Commit ccadee9b authored by Laurent Pinchart's avatar Laurent Pinchart
Browse files

dmaengine: rcar-dmac: Implement support for hardware descriptor lists



The DMAC supports hardware-based auto-configuration from descriptor
lists. This reduces the number of interrupts required for processing a
DMA transfer. Support that mode in the driver.

Signed-off-by: default avatarLaurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Tested-by: default avatarWolfram Sang <wsa+renesas@sang-engineering.com>
parent 87244fe5
Loading
Loading
Loading
Loading
+253 −51
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
 * published by the Free Software Foundation.
 */

#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/interrupt.h>
#include <linux/list.h>
@@ -40,6 +41,19 @@ struct rcar_dmac_xfer_chunk {
	u32 size;
};

/*
 * struct rcar_dmac_hw_desc - Hardware descriptor for a transfer chunk
 * @sar: value of the SAR register (source address)
 * @dar: value of the DAR register (destination address)
 * @tcr: value of the TCR register (transfer count)
 */
struct rcar_dmac_hw_desc {
	u32 sar;
	u32 dar;
	u32 tcr;
	u32 reserved;
} __attribute__((__packed__));

/*
 * struct rcar_dmac_desc - R-Car Gen2 DMA Transfer Descriptor
 * @async_tx: base DMA asynchronous transaction descriptor
@@ -49,6 +63,10 @@ struct rcar_dmac_xfer_chunk {
 * @node: entry in the channel's descriptors lists
 * @chunks: list of transfer chunks for this transfer
 * @running: the transfer chunk being currently processed
 * @nchunks: number of transfer chunks for this transfer
 * @hwdescs.mem: hardware descriptors memory for the transfer
 * @hwdescs.dma: device address of the hardware descriptors memory
 * @hwdescs.size: size of the hardware descriptors in bytes
 * @size: transfer size in bytes
 * @cyclic: when set indicates that the DMA transfer is cyclic
 */
@@ -61,6 +79,13 @@ struct rcar_dmac_desc {
	struct list_head node;
	struct list_head chunks;
	struct rcar_dmac_xfer_chunk *running;
	unsigned int nchunks;

	struct {
		struct rcar_dmac_hw_desc *mem;
		dma_addr_t dma;
		size_t size;
	} hwdescs;

	unsigned int size;
	bool cyclic;
@@ -217,7 +242,8 @@ struct rcar_dmac {
#define RCAR_DMATSRB			0x0038
#define RCAR_DMACHCRB			0x001c
#define RCAR_DMACHCRB_DCNT(n)		((n) << 24)
#define RCAR_DMACHCRB_DPTR(n)		((n) << 16)
#define RCAR_DMACHCRB_DPTR_MASK		(0xff << 16)
#define RCAR_DMACHCRB_DPTR_SHIFT	16
#define RCAR_DMACHCRB_DRST		(1 << 15)
#define RCAR_DMACHCRB_DTS		(1 << 8)
#define RCAR_DMACHCRB_SLM_NORMAL	(0 << 4)
@@ -289,6 +315,57 @@ static bool rcar_dmac_chan_is_busy(struct rcar_dmac_chan *chan)
static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
{
	struct rcar_dmac_desc *desc = chan->desc.running;
	u32 chcr = desc->chcr;

	WARN_ON_ONCE(rcar_dmac_chan_is_busy(chan));

	if (chan->mid_rid >= 0)
		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);

	if (desc->hwdescs.mem) {
		dev_dbg(chan->chan.device->dev,
			"chan%u: queue desc %p: %u@%pad\n",
			chan->index, desc, desc->nchunks, &desc->hwdescs.dma);

#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
		rcar_dmac_chan_write(chan, RCAR_DMAFIXDPBASE,
				     desc->hwdescs.dma >> 32);
#endif
		rcar_dmac_chan_write(chan, RCAR_DMADPBASE,
				     (desc->hwdescs.dma & 0xfffffff0) |
				     RCAR_DMADPBASE_SEL);
		rcar_dmac_chan_write(chan, RCAR_DMACHCRB,
				     RCAR_DMACHCRB_DCNT(desc->nchunks - 1) |
				     RCAR_DMACHCRB_DRST);

		/*
		 * Program the descriptor stage interrupt to occur after the end
		 * of the first stage.
		 */
		rcar_dmac_chan_write(chan, RCAR_DMADPCR, RCAR_DMADPCR_DIPT(1));

		chcr |= RCAR_DMACHCR_RPT_SAR | RCAR_DMACHCR_RPT_DAR
		     |  RCAR_DMACHCR_RPT_TCR | RCAR_DMACHCR_DPB;

		/*
		 * If the descriptor isn't cyclic enable normal descriptor mode
		 * and the transfer completion interrupt.
		 */
		if (!desc->cyclic)
			chcr |= RCAR_DMACHCR_DPM_ENABLED | RCAR_DMACHCR_IE;
		/*
		 * If the descriptor is cyclic and has a callback enable the
		 * descriptor stage interrupt in infinite repeat mode.
		 */
		else if (desc->async_tx.callback)
			chcr |= RCAR_DMACHCR_DPM_INFINITE | RCAR_DMACHCR_DSIE;
		/*
		 * Otherwise just select infinite repeat mode without any
		 * interrupt.
		 */
		else
			chcr |= RCAR_DMACHCR_DPM_INFINITE;
	} else {
		struct rcar_dmac_xfer_chunk *chunk = desc->running;

		dev_dbg(chan->chan.device->dev,
@@ -296,23 +373,23 @@ static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
			chan->index, chunk, chunk->size, &chunk->src_addr,
			&chunk->dst_addr);

	WARN_ON_ONCE(rcar_dmac_chan_is_busy(chan));

#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
	rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR, chunk->src_addr >> 32);
	rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR, chunk->dst_addr >> 32);
		rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
				     chunk->src_addr >> 32);
		rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
				     chunk->dst_addr >> 32);
#endif
	rcar_dmac_chan_write(chan, RCAR_DMASAR, chunk->src_addr & 0xffffffff);
	rcar_dmac_chan_write(chan, RCAR_DMADAR, chunk->dst_addr & 0xffffffff);

	if (chan->mid_rid >= 0)
		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);

		rcar_dmac_chan_write(chan, RCAR_DMASAR,
				     chunk->src_addr & 0xffffffff);
		rcar_dmac_chan_write(chan, RCAR_DMADAR,
				     chunk->dst_addr & 0xffffffff);
		rcar_dmac_chan_write(chan, RCAR_DMATCR,
				     chunk->size >> desc->xfer_shift);

	rcar_dmac_chan_write(chan, RCAR_DMACHCR, desc->chcr | RCAR_DMACHCR_DE |
			     RCAR_DMACHCR_IE);
		chcr |= RCAR_DMACHCR_DPM_DISABLED | RCAR_DMACHCR_IE;
	}

	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr | RCAR_DMACHCR_DE);
}

static int rcar_dmac_init(struct rcar_dmac *dmac)
@@ -403,31 +480,58 @@ static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
 * @desc: the descriptor
 *
 * Put the descriptor and its transfer chunk descriptors back in the channel's
 * free descriptors lists. The descriptor's chunk will be reinitialized to an
 * empty list as a result.
 * free descriptors lists, and free the hardware descriptors list memory. The
 * descriptor's chunks list will be reinitialized to an empty list as a result.
 *
 * The descriptor must have been removed from the channel's done list before
 * calling this function.
 * The descriptor must have been removed from the channel's lists before calling
 * this function.
 *
 * Locking: Must be called with the channel lock held.
 * Locking: Must be called in non-atomic context.
 */
static void rcar_dmac_desc_put(struct rcar_dmac_chan *chan,
			       struct rcar_dmac_desc *desc)
{
	if (desc->hwdescs.mem) {
		dma_free_coherent(NULL, desc->hwdescs.size, desc->hwdescs.mem,
				  desc->hwdescs.dma);
		desc->hwdescs.mem = NULL;
	}

	spin_lock_irq(&chan->lock);
	list_splice_tail_init(&desc->chunks, &chan->desc.chunks_free);
	list_add_tail(&desc->node, &chan->desc.free);
	spin_unlock_irq(&chan->lock);
}

static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan)
{
	struct rcar_dmac_desc *desc, *_desc;
	LIST_HEAD(list);

	/*
	 * We have to temporarily move all descriptors from the wait list to a
	 * local list as iterating over the wait list, even with
	 * list_for_each_entry_safe, isn't safe if we release the channel lock
	 * around the rcar_dmac_desc_put() call.
	 */
	spin_lock_irq(&chan->lock);
	list_splice_init(&chan->desc.wait, &list);
	spin_unlock_irq(&chan->lock);

	list_for_each_entry_safe(desc, _desc, &chan->desc.wait, node) {
	list_for_each_entry_safe(desc, _desc, &list, node) {
		if (async_tx_test_ack(&desc->async_tx)) {
			list_del(&desc->node);
			rcar_dmac_desc_put(chan, desc);
		}
	}

	if (list_empty(&list))
		return;

	/* Put the remaining descriptors back in the wait list. */
	spin_lock_irq(&chan->lock);
	list_splice(&list, &chan->desc.wait);
	spin_unlock_irq(&chan->lock);
}

/*
@@ -444,11 +548,11 @@ static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan)
	struct rcar_dmac_desc *desc;
	int ret;

	spin_lock_irq(&chan->lock);

	/* Recycle acked descriptors before attempting allocation. */
	rcar_dmac_desc_recycle_acked(chan);

	spin_lock_irq(&chan->lock);

	do {
		if (list_empty(&chan->desc.free)) {
			/*
@@ -547,6 +651,28 @@ rcar_dmac_xfer_chunk_get(struct rcar_dmac_chan *chan)
	return chunk;
}

static void rcar_dmac_alloc_hwdesc(struct rcar_dmac_chan *chan,
				   struct rcar_dmac_desc *desc)
{
	struct rcar_dmac_xfer_chunk *chunk;
	struct rcar_dmac_hw_desc *hwdesc;
	size_t size = desc->nchunks * sizeof(*hwdesc);

	hwdesc = dma_alloc_coherent(NULL, size, &desc->hwdescs.dma, GFP_NOWAIT);
	if (!hwdesc)
		return;

	desc->hwdescs.mem = hwdesc;
	desc->hwdescs.size = size;

	list_for_each_entry(chunk, &desc->chunks, node) {
		hwdesc->sar = chunk->src_addr;
		hwdesc->dar = chunk->dst_addr;
		hwdesc->tcr = chunk->size >> desc->xfer_shift;
		hwdesc++;
	}
}

/* -----------------------------------------------------------------------------
 * Stop and reset
 */
@@ -555,7 +681,8 @@ static void rcar_dmac_chan_halt(struct rcar_dmac_chan *chan)
{
	u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);

	chcr &= ~(RCAR_DMACHCR_IE | RCAR_DMACHCR_TE | RCAR_DMACHCR_DE);
	chcr &= ~(RCAR_DMACHCR_DSE | RCAR_DMACHCR_DSIE | RCAR_DMACHCR_IE |
		  RCAR_DMACHCR_TE | RCAR_DMACHCR_DE);
	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr);
}

@@ -666,8 +793,10 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
	struct rcar_dmac_xfer_chunk *chunk;
	struct rcar_dmac_desc *desc;
	struct scatterlist *sg;
	unsigned int nchunks = 0;
	unsigned int max_chunk_size;
	unsigned int full_size = 0;
	bool highmem = false;
	unsigned int i;

	desc = rcar_dmac_desc_get(chan);
@@ -706,6 +835,14 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
				size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32)
				size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;

			/*
			 * Check if either of the source or destination address
			 * can't be expressed in 32 bits. If so we can't use
			 * hardware descriptor lists.
			 */
			if (dev_addr >> 32 || mem_addr >> 32)
				highmem = true;
#endif

			chunk = rcar_dmac_xfer_chunk_get(chan);
@@ -736,11 +873,26 @@ rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
			len -= size;

			list_add_tail(&chunk->node, &desc->chunks);
			nchunks++;
		}
	}

	desc->nchunks = nchunks;
	desc->size = full_size;

	/*
	 * Use hardware descriptor lists if possible when more than one chunk
	 * needs to be transferred (otherwise they don't make much sense).
	 *
	 * The highmem check currently covers the whole transfer. As an
	 * optimization we could use descriptor lists for consecutive lowmem
	 * chunks and direct manual mode for highmem chunks. Whether the
	 * performance improvement would be significant enough compared to the
	 * additional complexity remains to be investigated.
	 */
	if (!highmem && nchunks > 1)
		rcar_dmac_alloc_hwdesc(chan, desc);

	return &desc->async_tx;
}

@@ -940,8 +1092,10 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
					       dma_cookie_t cookie)
{
	struct rcar_dmac_desc *desc = chan->desc.running;
	struct rcar_dmac_xfer_chunk *running = NULL;
	struct rcar_dmac_xfer_chunk *chunk;
	unsigned int residue = 0;
	unsigned int dptr = 0;

	if (!desc)
		return 0;
@@ -954,9 +1108,23 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
	if (cookie != desc->async_tx.cookie)
		return desc->size;

	/*
	 * In descriptor mode the descriptor running pointer is not maintained
	 * by the interrupt handler, find the running descriptor from the
	 * descriptor pointer field in the CHCRB register. In non-descriptor
	 * mode just use the running descriptor pointer.
	 */
	if (desc->hwdescs.mem) {
		dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
			RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
		WARN_ON(dptr >= desc->nchunks);
	} else {
		running = desc->running;
	}

	/* Compute the size of all chunks still to be transferred. */
	list_for_each_entry_reverse(chunk, &desc->chunks, node) {
		if (chunk == desc->running)
		if (chunk == running || ++dptr == desc->nchunks)
			break;

		residue += chunk->size;
@@ -1025,43 +1193,72 @@ done:
 * IRQ handling
 */

static irqreturn_t rcar_dmac_isr_desc_stage_end(struct rcar_dmac_chan *chan)
{
	struct rcar_dmac_desc *desc = chan->desc.running;
	unsigned int stage;

	if (WARN_ON(!desc || !desc->cyclic)) {
		/*
		 * This should never happen, there should always be a running
		 * cyclic descriptor when a descriptor stage end interrupt is
		 * triggered. Warn and return.
		 */
		return IRQ_NONE;
	}

	/* Program the interrupt pointer to the next stage. */
	stage = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
		 RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
	rcar_dmac_chan_write(chan, RCAR_DMADPCR, RCAR_DMADPCR_DIPT(stage));

	return IRQ_WAKE_THREAD;
}

static irqreturn_t rcar_dmac_isr_transfer_end(struct rcar_dmac_chan *chan)
{
	struct rcar_dmac_desc *desc = chan->desc.running;
	struct rcar_dmac_xfer_chunk *chunk;
	irqreturn_t ret = IRQ_WAKE_THREAD;

	if (WARN_ON_ONCE(!desc)) {
		/*
		 * This should never happen, there should always be
		 * a running descriptor when a transfer ends. Warn and
		 * return.
		 * This should never happen, there should always be a running
		 * descriptor when a transfer end interrupt is triggered. Warn
		 * and return.
		 */
		return IRQ_NONE;
	}

	/*
	 * If we haven't completed the last transfer chunk simply move to the
	 * next one. Only wake the IRQ thread if the transfer is cyclic.
	 * The transfer end interrupt isn't generated for each chunk when using
	 * descriptor mode. Only update the running chunk pointer in
	 * non-descriptor mode.
	 */
	if (!desc->hwdescs.mem) {
		/*
		 * If we haven't completed the last transfer chunk simply move
		 * to the next one. Only wake the IRQ thread if the transfer is
		 * cyclic.
		 */
	chunk = desc->running;
	if (!list_is_last(&chunk->node, &desc->chunks)) {
		desc->running = list_next_entry(chunk, node);
		if (!list_is_last(&desc->running->node, &desc->chunks)) {
			desc->running = list_next_entry(desc->running, node);
			if (!desc->cyclic)
				ret = IRQ_HANDLED;
			goto done;
		}

		/*
	 * We've completed the last transfer chunk. If the transfer is cyclic,
	 * move back to the first one.
		 * We've completed the last transfer chunk. If the transfer is
		 * cyclic, move back to the first one.
		 */
		if (desc->cyclic) {
		desc->running = list_first_entry(&desc->chunks,
			desc->running =
				list_first_entry(&desc->chunks,
						 struct rcar_dmac_xfer_chunk,
						 node);
			goto done;
		}
	}

	/* The descriptor is complete, move it to the done list. */
	list_move_tail(&desc->node, &chan->desc.done);
@@ -1083,6 +1280,7 @@ done:

static irqreturn_t rcar_dmac_isr_channel(int irq, void *dev)
{
	u32 mask = RCAR_DMACHCR_DSE | RCAR_DMACHCR_TE;
	struct rcar_dmac_chan *chan = dev;
	irqreturn_t ret = IRQ_NONE;
	u32 chcr;
@@ -1090,8 +1288,12 @@ static irqreturn_t rcar_dmac_isr_channel(int irq, void *dev)
	spin_lock(&chan->lock);

	chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
	rcar_dmac_chan_write(chan, RCAR_DMACHCR,
			     chcr & ~(RCAR_DMACHCR_TE | RCAR_DMACHCR_DE));
	if (chcr & RCAR_DMACHCR_TE)
		mask |= RCAR_DMACHCR_DE;
	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr & ~mask);

	if (chcr & RCAR_DMACHCR_DSE)
		ret |= rcar_dmac_isr_desc_stage_end(chan);

	if (chcr & RCAR_DMACHCR_TE)
		ret |= rcar_dmac_isr_transfer_end(chan);
@@ -1148,11 +1350,11 @@ static irqreturn_t rcar_dmac_isr_channel_thread(int irq, void *dev)
		list_add_tail(&desc->node, &chan->desc.wait);
	}

	spin_unlock_irq(&chan->lock);

	/* Recycle all acked descriptors. */
	rcar_dmac_desc_recycle_acked(chan);

	spin_unlock_irq(&chan->lock);

	return IRQ_HANDLED;
}