Commit fdb89a31 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-stmmac-Performance-improvements-in-Multi-Queue'



Jose Abreu says:

====================
net: stmmac: Performance improvements in Multi-Queue

Tested in XGMAC2 and GMAC5.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d0e698d5 ae9f346d
Loading
Loading
Loading
Loading
+7 −17
Original line number Diff line number Diff line
@@ -124,9 +124,9 @@ void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan)
int dwmac4_dma_interrupt(void __iomem *ioaddr,
			 struct stmmac_extra_stats *x, u32 chan)
{
	int ret = 0;

	u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan));
	u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
	int ret = 0;

	/* ABNORMAL interrupts */
	if (unlikely(intr_status & DMA_CHAN_STATUS_AIS)) {
@@ -151,16 +151,11 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr,
	if (likely(intr_status & DMA_CHAN_STATUS_NIS)) {
		x->normal_irq_n++;
		if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
			u32 value;

			value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
			/* to schedule NAPI on real RIE event. */
			if (likely(value & DMA_CHAN_INTR_ENA_RIE)) {
			x->rx_normal_irq_n++;
			ret |= handle_rx;
		}
		}
		if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
		if (likely(intr_status & (DMA_CHAN_STATUS_TI |
					  DMA_CHAN_STATUS_TBU))) {
			x->tx_normal_irq_n++;
			ret |= handle_tx;
		}
@@ -168,12 +163,7 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr,
			x->rx_early_irq++;
	}

	/* Clear the interrupt by writing a logic 1 to the chanX interrupt
	 * status [21-0] expect reserved bits [5-3]
	 */
	writel((intr_status & 0x3fffc7),
	       ioaddr + DMA_CHAN_STATUS(chan));

	writel(intr_status & intr_en, ioaddr + DMA_CHAN_STATUS(chan));
	return ret;
}

+3 −1
Original line number Diff line number Diff line
@@ -193,9 +193,10 @@
#define XGMAC_AIE			BIT(14)
#define XGMAC_RBUE			BIT(7)
#define XGMAC_RIE			BIT(6)
#define XGMAC_TBUE			BIT(2)
#define XGMAC_TIE			BIT(0)
#define XGMAC_DMA_INT_DEFAULT_EN	(XGMAC_NIE | XGMAC_AIE | XGMAC_RBUE | \
					XGMAC_RIE | XGMAC_TIE)
					XGMAC_RIE | XGMAC_TBUE | XGMAC_TIE)
#define XGMAC_DMA_CH_Rx_WATCHDOG(x)	(0x0000313c + (0x80 * (x)))
#define XGMAC_RWT			GENMASK(7, 0)
#define XGMAC_DMA_CH_STATUS(x)		(0x00003160 + (0x80 * (x)))
@@ -204,6 +205,7 @@
#define XGMAC_FBE			BIT(12)
#define XGMAC_RBU			BIT(7)
#define XGMAC_RI			BIT(6)
#define XGMAC_TBU			BIT(2)
#define XGMAC_TPS			BIT(1)
#define XGMAC_TI			BIT(0)

+3 −5
Original line number Diff line number Diff line
@@ -283,12 +283,10 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr,
		x->normal_irq_n++;

		if (likely(intr_status & XGMAC_RI)) {
			if (likely(intr_en & XGMAC_RIE)) {
			x->rx_normal_irq_n++;
			ret |= handle_rx;
		}
		}
		if (likely(intr_status & XGMAC_TI)) {
		if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
			x->tx_normal_irq_n++;
			ret |= handle_tx;
		}
+2 −3
Original line number Diff line number Diff line
@@ -79,11 +79,10 @@ struct stmmac_rx_queue {
};

struct stmmac_channel {
	struct napi_struct napi ____cacheline_aligned_in_smp;
	struct napi_struct rx_napi ____cacheline_aligned_in_smp;
	struct napi_struct tx_napi ____cacheline_aligned_in_smp;
	struct stmmac_priv *priv_data;
	u32 index;
	int has_rx;
	int has_tx;
};

struct stmmac_tc_entry {
+66 −49
Original line number Diff line number Diff line
@@ -155,7 +155,10 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
	for (queue = 0; queue < maxq; queue++) {
		struct stmmac_channel *ch = &priv->channel[queue];

		napi_disable(&ch->napi);
		if (queue < rx_queues_cnt)
			napi_disable(&ch->rx_napi);
		if (queue < tx_queues_cnt)
			napi_disable(&ch->tx_napi);
	}
}

@@ -173,7 +176,10 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv)
	for (queue = 0; queue < maxq; queue++) {
		struct stmmac_channel *ch = &priv->channel[queue];

		napi_enable(&ch->napi);
		if (queue < rx_queues_cnt)
			napi_enable(&ch->rx_napi);
		if (queue < tx_queues_cnt)
			napi_enable(&ch->tx_napi);
	}
}

@@ -1955,6 +1961,10 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
	}

	/* We still have pending packets, let's call for a new scheduling */
	if (tx_q->dirty_tx != tx_q->cur_tx)
		mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));

	__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));

	return count;
@@ -2045,23 +2055,15 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
	int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
						 &priv->xstats, chan);
	struct stmmac_channel *ch = &priv->channel[chan];
	bool needs_work = false;

	if ((status & handle_rx) && ch->has_rx) {
		needs_work = true;
	} else {
		status &= ~handle_rx;
	}

	if ((status & handle_tx) && ch->has_tx) {
		needs_work = true;
	} else {
		status &= ~handle_tx;
	if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
		stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
		napi_schedule_irqoff(&ch->rx_napi);
	}

	if (needs_work && napi_schedule_prep(&ch->napi)) {
	if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
		stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
		__napi_schedule(&ch->napi);
		napi_schedule_irqoff(&ch->tx_napi);
	}

	return status;
@@ -2257,8 +2259,14 @@ static void stmmac_tx_timer(struct timer_list *t)

	ch = &priv->channel[tx_q->queue_index];

	if (likely(napi_schedule_prep(&ch->napi)))
		__napi_schedule(&ch->napi);
	/*
	 * If NAPI is already running we can miss some events. Let's rearm
	 * the timer and try again.
	 */
	if (likely(napi_schedule_prep(&ch->tx_napi)))
		__napi_schedule(&ch->tx_napi);
	else
		mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
}

/**
@@ -3514,7 +3522,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
			else
				skb->ip_summed = CHECKSUM_UNNECESSARY;

			napi_gro_receive(&ch->napi, skb);
			napi_gro_receive(&ch->rx_napi, skb);

			priv->dev->stats.rx_packets++;
			priv->dev->stats.rx_bytes += frame_len;
@@ -3529,40 +3537,45 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
	return count;
}

/**
 *  stmmac_poll - stmmac poll method (NAPI)
 *  @napi : pointer to the napi structure.
 *  @budget : maximum number of packets that the current CPU can receive from
 *	      all interfaces.
 *  Description :
 *  To look at the incoming frames and clear the tx resources.
 */
static int stmmac_napi_poll(struct napi_struct *napi, int budget)
static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
{
	struct stmmac_channel *ch =
		container_of(napi, struct stmmac_channel, napi);
		container_of(napi, struct stmmac_channel, rx_napi);
	struct stmmac_priv *priv = ch->priv_data;
	int work_done, rx_done = 0, tx_done = 0;
	u32 chan = ch->index;
	int work_done;

	priv->xstats.napi_poll++;

	if (ch->has_tx)
		tx_done = stmmac_tx_clean(priv, budget, chan);
	if (ch->has_rx)
		rx_done = stmmac_rx(priv, budget, chan);
	work_done = stmmac_rx(priv, budget, chan);
	if (work_done < budget && napi_complete_done(napi, work_done))
		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
	return work_done;
}

	work_done = max(rx_done, tx_done);
	work_done = min(work_done, budget);
static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
{
	struct stmmac_channel *ch =
		container_of(napi, struct stmmac_channel, tx_napi);
	struct stmmac_priv *priv = ch->priv_data;
	struct stmmac_tx_queue *tx_q;
	u32 chan = ch->index;
	int work_done;

	if (work_done < budget && napi_complete_done(napi, work_done)) {
		int stat;
	priv->xstats.napi_poll++;

	work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
	work_done = min(work_done, budget);

	if (work_done < budget && napi_complete_done(napi, work_done))
		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
		stat = stmmac_dma_interrupt_status(priv, priv->ioaddr,
						   &priv->xstats, chan);
		if (stat && napi_reschedule(napi))
			stmmac_disable_dma_irq(priv, priv->ioaddr, chan);

	/* Force transmission restart */
	tx_q = &priv->tx_queue[chan];
	if (tx_q->cur_tx != tx_q->dirty_tx) {
		stmmac_enable_dma_transmission(priv, priv->ioaddr);
		stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr,
				       chan);
	}

	return work_done;
@@ -4342,14 +4355,15 @@ int stmmac_dvr_probe(struct device *device,
		ch->priv_data = priv;
		ch->index = queue;

		if (queue < priv->plat->rx_queues_to_use)
			ch->has_rx = true;
		if (queue < priv->plat->tx_queues_to_use)
			ch->has_tx = true;

		netif_napi_add(ndev, &ch->napi, stmmac_napi_poll,
		if (queue < priv->plat->rx_queues_to_use) {
			netif_napi_add(ndev, &ch->rx_napi, stmmac_napi_poll_rx,
				       NAPI_POLL_WEIGHT);
		}
		if (queue < priv->plat->tx_queues_to_use) {
			netif_napi_add(ndev, &ch->tx_napi, stmmac_napi_poll_tx,
				       NAPI_POLL_WEIGHT);
		}
	}

	mutex_init(&priv->lock);

@@ -4404,7 +4418,10 @@ error_mdio_register:
	for (queue = 0; queue < maxq; queue++) {
		struct stmmac_channel *ch = &priv->channel[queue];

		netif_napi_del(&ch->napi);
		if (queue < priv->plat->rx_queues_to_use)
			netif_napi_del(&ch->rx_napi);
		if (queue < priv->plat->tx_queues_to_use)
			netif_napi_del(&ch->tx_napi);
	}
error_hw_init:
	destroy_workqueue(priv->wq);