Commit 4c532b14 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-napi-addition-of-napi_defer_hard_irqs'



Eric Dumazet says:

====================
net: napi: addition of napi_defer_hard_irqs

This patch series augments gro_glush_timeout feature with napi_defer_hard_irqs

As extensively described in first patch changelog, this can suppresss
the chit-chat traffic between NIC and host to signal interrupts and re-arming
them, since this can be an issue on high speed NIC with many queues.

The last patch in this series converts mlx4 TX completion to
napi_complete_done(), to enable this new mechanism.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e6acd2b6 cf4058db
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -946,7 +946,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
		xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
		xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
		if (xdp_tx_cq->xdp_busy) {
		if (xdp_tx_cq->xdp_busy) {
			clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq,
			clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq,
							       budget);
							       budget) < budget;
			xdp_tx_cq->xdp_busy = !clean_complete;
			xdp_tx_cq->xdp_busy = !clean_complete;
		}
		}
	}
	}
+10 −10
Original line number Original line Diff line number Diff line
@@ -382,7 +382,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
	return cnt;
	return cnt;
}
}


bool mlx4_en_process_tx_cq(struct net_device *dev,
int mlx4_en_process_tx_cq(struct net_device *dev,
			  struct mlx4_en_cq *cq, int napi_budget)
			  struct mlx4_en_cq *cq, int napi_budget)
{
{
	struct mlx4_en_priv *priv = netdev_priv(dev);
	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -405,7 +405,7 @@ bool mlx4_en_process_tx_cq(struct net_device *dev,
	u32 ring_cons;
	u32 ring_cons;


	if (unlikely(!priv->port_up))
	if (unlikely(!priv->port_up))
		return true;
		return 0;


	netdev_txq_bql_complete_prefetchw(ring->tx_queue);
	netdev_txq_bql_complete_prefetchw(ring->tx_queue);


@@ -480,7 +480,7 @@ bool mlx4_en_process_tx_cq(struct net_device *dev,
	WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
	WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);


	if (cq->type == TX_XDP)
	if (cq->type == TX_XDP)
		return done < budget;
		return done;


	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);


@@ -492,7 +492,7 @@ bool mlx4_en_process_tx_cq(struct net_device *dev,
		ring->wake_queue++;
		ring->wake_queue++;
	}
	}


	return done < budget;
	return done;
}
}


void mlx4_en_tx_irq(struct mlx4_cq *mcq)
void mlx4_en_tx_irq(struct mlx4_cq *mcq)
@@ -512,13 +512,13 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
	struct net_device *dev = cq->dev;
	struct net_device *dev = cq->dev;
	struct mlx4_en_priv *priv = netdev_priv(dev);
	struct mlx4_en_priv *priv = netdev_priv(dev);
	bool clean_complete;
	int work_done;


	clean_complete = mlx4_en_process_tx_cq(dev, cq, budget);
	work_done = mlx4_en_process_tx_cq(dev, cq, budget);
	if (!clean_complete)
	if (work_done >= budget)
		return budget;
		return budget;


	napi_complete(napi);
	if (napi_complete_done(napi, work_done))
		mlx4_en_arm_cq(priv, cq);
		mlx4_en_arm_cq(priv, cq);


	return 0;
	return 0;
+2 −2
Original line number Original line Diff line number Diff line
@@ -737,7 +737,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
			  int budget);
			  int budget);
int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
bool mlx4_en_process_tx_cq(struct net_device *dev,
int mlx4_en_process_tx_cq(struct net_device *dev,
			  struct mlx4_en_cq *cq, int napi_budget);
			  struct mlx4_en_cq *cq, int napi_budget);
u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
			 struct mlx4_en_tx_ring *ring,
			 struct mlx4_en_tx_ring *ring,
+2 −0
Original line number Original line Diff line number Diff line
@@ -329,6 +329,7 @@ struct napi_struct {


	unsigned long		state;
	unsigned long		state;
	int			weight;
	int			weight;
	int			defer_hard_irqs_count;
	unsigned long		gro_bitmask;
	unsigned long		gro_bitmask;
	int			(*poll)(struct napi_struct *, int);
	int			(*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
#ifdef CONFIG_NETPOLL
@@ -1995,6 +1996,7 @@ struct net_device {


	struct bpf_prog __rcu	*xdp_prog;
	struct bpf_prog __rcu	*xdp_prog;
	unsigned long		gro_flush_timeout;
	unsigned long		gro_flush_timeout;
	int			napi_defer_hard_irqs;
	rx_handler_func_t __rcu	*rx_handler;
	rx_handler_func_t __rcu	*rx_handler;
	void __rcu		*rx_handler_data;
	void __rcu		*rx_handler_data;


+18 −11
Original line number Original line Diff line number Diff line
@@ -6227,7 +6227,8 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);


bool napi_complete_done(struct napi_struct *n, int work_done)
bool napi_complete_done(struct napi_struct *n, int work_done)
{
{
	unsigned long flags, val, new;
	unsigned long flags, val, new, timeout = 0;
	bool ret = true;


	/*
	/*
	 * 1) Don't let napi dequeue from the cpu poll list
	 * 1) Don't let napi dequeue from the cpu poll list
@@ -6239,20 +6240,23 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
				 NAPIF_STATE_IN_BUSY_POLL)))
				 NAPIF_STATE_IN_BUSY_POLL)))
		return false;
		return false;


	if (work_done) {
		if (n->gro_bitmask)
			timeout = READ_ONCE(n->dev->gro_flush_timeout);
		n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
	}
	if (n->defer_hard_irqs_count > 0) {
		n->defer_hard_irqs_count--;
		timeout = READ_ONCE(n->dev->gro_flush_timeout);
		if (timeout)
			ret = false;
	}
	if (n->gro_bitmask) {
	if (n->gro_bitmask) {
		unsigned long timeout = 0;

		if (work_done)
			timeout = n->dev->gro_flush_timeout;

		/* When the NAPI instance uses a timeout and keeps postponing
		/* When the NAPI instance uses a timeout and keeps postponing
		 * it, we need to bound somehow the time packets are kept in
		 * it, we need to bound somehow the time packets are kept in
		 * the GRO layer
		 * the GRO layer
		 */
		 */
		napi_gro_flush(n, !!timeout);
		napi_gro_flush(n, !!timeout);
		if (timeout)
			hrtimer_start(&n->timer, ns_to_ktime(timeout),
				      HRTIMER_MODE_REL_PINNED);
	}
	}


	gro_normal_list(n);
	gro_normal_list(n);
@@ -6284,7 +6288,10 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
		return false;
		return false;
	}
	}


	return true;
	if (timeout)
		hrtimer_start(&n->timer, ns_to_ktime(timeout),
			      HRTIMER_MODE_REL_PINNED);
	return ret;
}
}
EXPORT_SYMBOL(napi_complete_done);
EXPORT_SYMBOL(napi_complete_done);


@@ -6464,7 +6471,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
	 */
	 */
	if (napi->gro_bitmask && !napi_disable_pending(napi) &&
	if (!napi_disable_pending(napi) &&
	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
		__napi_schedule_irqoff(napi);
		__napi_schedule_irqoff(napi);


Loading