Commit 65124bbf authored by Jesse Brandeburg's avatar Jesse Brandeburg Committed by Jeff Kirsher
Browse files

ice: Reorganize tx_buf and ring structs



Use more efficient structure ordering by using the pahole tool
and a lot of code inspection to get hot cache lines to have
packed data (no holes if possible) and adjacent warm data.

ice_ring prior to this change:
  /* size: 192, cachelines: 3, members: 23 */
  /* sum members: 158, holes: 4, sum holes: 12 */
  /* padding: 22 */

ice_ring after this change:
  /* size: 192, cachelines: 3, members: 25 */
  /* sum members: 162, holes: 1, sum holes: 1 */
  /* padding: 29 */

ice_tx_buf prior to this change:
  /* size: 48, cachelines: 1, members: 7 */
  /* sum members: 38, holes: 2, sum holes: 6 */
  /* padding: 4 */
  /* last cacheline: 48 bytes */

ice_tx_buf after this change:
  /* size: 40, cachelines: 1, members: 7 */
  /* sum members: 38, holes: 1, sum holes: 2 */
  /* last cacheline: 40 bytes */

Signed-off-by: default avatarJesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: default avatarAnirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: default avatarAndrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 55e062ba
Loading
Loading
Loading
Loading
+21 −14
Original line number Diff line number Diff line
@@ -58,19 +58,19 @@ struct ice_tx_buf {
	unsigned int bytecount;
	unsigned short gso_segs;
	u32 tx_flags;
	DEFINE_DMA_UNMAP_ADDR(dma);
	DEFINE_DMA_UNMAP_LEN(len);
	DEFINE_DMA_UNMAP_ADDR(dma);
};

struct ice_tx_offload_params {
	u8 header_len;
	u64 cd_qw1;
	struct ice_ring *tx_ring;
	u32 td_cmd;
	u32 td_offset;
	u32 td_l2tag1;
	u16 cd_l2tag2;
	u32 cd_tunnel_params;
	u64 cd_qw1;
	struct ice_ring *tx_ring;
	u16 cd_l2tag2;
	u8 header_len;
};

struct ice_rx_buf {
@@ -150,6 +150,7 @@ enum ice_rx_dtype {

/* descriptor ring, associated with a VSI */
struct ice_ring {
	/* CL1 - 1st cacheline starts here */
	struct ice_ring *next;		/* pointer to next ring in q_vector */
	void *desc;			/* Descriptor ring memory */
	struct device *dev;		/* Used for DMA mapping */
@@ -161,11 +162,11 @@ struct ice_ring {
		struct ice_tx_buf *tx_buf;
		struct ice_rx_buf *rx_buf;
	};
	/* CL2 - 2nd cacheline starts here */
	u16 q_index;			/* Queue number of ring */
	u32 txq_teid;			/* Added Tx queue TEID */
#ifdef CONFIG_DCB
	u8 dcb_tc;		/* Traffic class of ring */
#endif /* CONFIG_DCB */
	u16 q_handle;			/* Queue handle per TC */

	u8 ring_active;			/* is ring online or not */

	u16 count;			/* Number of descriptors */
	u16 reg_idx;			/* HW register index of the ring */
@@ -173,8 +174,7 @@ struct ice_ring {
	/* used in interrupt processing */
	u16 next_to_use;
	u16 next_to_clean;

	u8 ring_active;			/* is ring online or not */
	u16 next_to_alloc;

	/* stats structs */
	struct ice_q_stats	stats;
@@ -184,10 +184,17 @@ struct ice_ring {
		struct ice_rxq_stats rx_stats;
	};

	unsigned int size;		/* length of descriptor ring in bytes */
	dma_addr_t dma;			/* physical address of ring */
	struct rcu_head rcu;		/* to avoid race on free */
	u16 next_to_alloc;
	/* CLX - the below items are only accessed infrequently and should be
	 * in their own cache line if possible
	 */
	dma_addr_t dma;			/* physical address of ring */
	unsigned int size;		/* length of descriptor ring in bytes */
	u32 txq_teid;			/* Added Tx queue TEID */
	u16 rx_buf_len;
#ifdef CONFIG_DCB
	u8 dcb_tc;			/* Traffic class of ring */
#endif /* CONFIG_DCB */
} ____cacheline_internodealigned_in_smp;

struct ice_ring_container {