Commit ebf8c9aa authored by Yevgeny Petrilin's avatar Yevgeny Petrilin Committed by David S. Miller
Browse files

net/mlx4_en: Saving mem access on data path



Localized the pdev->dev, and using dma_map instead of pci_map
There are multiple map/unmap operations on data path,
optimizing those by saving redundant pointer access.
Those places were identified as hot-spots when running kernel profiling
during some benchmarks.
The fixes had most impact when testing packet rate with small packets,
reducing several % from CPU load, and in some case being the difference
between reaching wire speed or being CPU bound.

Signed-off-by: default avatarYevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6975f4ce
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1062,6 +1062,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
	memset(priv, 0, sizeof(struct mlx4_en_priv));
	priv->dev = dev;
	priv->mdev = mdev;
	priv->ddev = &mdev->pdev->dev;
	priv->prof = prof;
	priv->port = port;
	priv->port_up = false;
+5 −9
Original line number Diff line number Diff line
@@ -48,7 +48,6 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
			      struct mlx4_en_rx_alloc *ring_alloc,
			      int i)
{
	struct mlx4_en_dev *mdev = priv->mdev;
	struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
	struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
	struct page *page;
@@ -72,7 +71,7 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
		skb_frags[i].offset = page_alloc->offset;
		page_alloc->offset += frag_info->frag_stride;
	}
	dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) +
	dma = dma_map_single(priv->ddev, page_address(skb_frags[i].page) +
			     skb_frags[i].offset, frag_info->frag_size,
			     PCI_DMA_FROMDEVICE);
	rx_desc->data[i].addr = cpu_to_be64(dma);
@@ -186,7 +185,6 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
				 struct mlx4_en_rx_ring *ring,
				 int index)
{
	struct mlx4_en_dev *mdev = priv->mdev;
	struct page_frag *skb_frags;
	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
	dma_addr_t dma;
@@ -198,7 +196,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
		dma = be64_to_cpu(rx_desc->data[nr].addr);

		en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma);
		pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
		dma_unmap_single(priv->ddev, dma, skb_frags[nr].size,
				 PCI_DMA_FROMDEVICE);
		put_page(skb_frags[nr].page);
	}
@@ -412,7 +410,6 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
				    int length)
{
	struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
	struct mlx4_en_dev *mdev = priv->mdev;
	struct mlx4_en_frag_info *frag_info;
	int nr;
	dma_addr_t dma;
@@ -435,7 +432,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
			goto fail;

		/* Unmap buffer */
		pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags_rx[nr]),
		dma_unmap_single(priv->ddev, dma, skb_frag_size(&skb_frags_rx[nr]),
				 PCI_DMA_FROMDEVICE);
	}
	/* Adjust size of last fragment to match actual length */
@@ -461,7 +458,6 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
				      struct mlx4_en_rx_alloc *page_alloc,
				      unsigned int length)
{
	struct mlx4_en_dev *mdev = priv->mdev;
	struct sk_buff *skb;
	void *va;
	int used_frags;
@@ -483,10 +479,10 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
		/* We are copying all relevant data to the skb - temporarily
		 * synch buffers for the copy */
		dma = be64_to_cpu(rx_desc->data[0].addr);
		dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length,
		dma_sync_single_for_cpu(priv->ddev, dma, length,
					DMA_FROM_DEVICE);
		skb_copy_to_linear_data(skb, va, length);
		dma_sync_single_for_device(&mdev->pdev->dev, dma, length,
		dma_sync_single_for_device(priv->ddev, dma, length,
					   DMA_FROM_DEVICE);
		skb->tail += length;
	} else {
+6 −7
Original line number Diff line number Diff line
@@ -198,7 +198,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
				struct mlx4_en_tx_ring *ring,
				int index, u8 owner)
{
	struct mlx4_en_dev *mdev = priv->mdev;
	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
	struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
@@ -214,7 +213,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
	if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
		if (!tx_info->inl) {
			if (tx_info->linear) {
				pci_unmap_single(mdev->pdev,
				dma_unmap_single(priv->ddev,
					(dma_addr_t) be64_to_cpu(data->addr),
					 be32_to_cpu(data->byte_count),
					 PCI_DMA_TODEVICE);
@@ -223,7 +222,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,

			for (i = 0; i < frags; i++) {
				frag = &skb_shinfo(skb)->frags[i];
				pci_unmap_page(mdev->pdev,
				dma_unmap_page(priv->ddev,
					(dma_addr_t) be64_to_cpu(data[i].addr),
					skb_frag_size(frag), PCI_DMA_TODEVICE);
			}
@@ -241,7 +240,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
			}

			if (tx_info->linear) {
				pci_unmap_single(mdev->pdev,
				dma_unmap_single(priv->ddev,
					(dma_addr_t) be64_to_cpu(data->addr),
					 be32_to_cpu(data->byte_count),
					 PCI_DMA_TODEVICE);
@@ -253,7 +252,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
				if ((void *) data >= end)
					data = ring->buf;
				frag = &skb_shinfo(skb)->frags[i];
				pci_unmap_page(mdev->pdev,
				dma_unmap_page(priv->ddev,
					(dma_addr_t) be64_to_cpu(data->addr),
					 skb_frag_size(frag), PCI_DMA_TODEVICE);
				++data;
@@ -733,7 +732,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
		/* Map fragments */
		for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
			frag = &skb_shinfo(skb)->frags[i];
			dma = skb_frag_dma_map(&mdev->dev->pdev->dev, frag,
			dma = skb_frag_dma_map(priv->ddev, frag,
					       0, skb_frag_size(frag),
					       DMA_TO_DEVICE);
			data->addr = cpu_to_be64(dma);
@@ -745,7 +744,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)

		/* Map linear part */
		if (tx_info->linear) {
			dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size,
			dma = dma_map_single(priv->ddev, skb->data + lso_header_size,
					     skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE);
			data->addr = cpu_to_be64(dma);
			data->lkey = cpu_to_be32(mdev->mr.key);
+1 −0
Original line number Diff line number Diff line
@@ -482,6 +482,7 @@ struct mlx4_en_priv {
	struct mlx4_en_stat_out_mbox hw_stats;
	int vids[128];
	bool wol;
	struct device *ddev;
};

enum mlx4_en_wol {