Commit 69625ea7 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'cxgb4-add-support-for-TC-MQPRIO-Qdisc-Offload'



Rahul Lakkireddy says:

====================
cxgb4: add support for TC-MQPRIO Qdisc Offload

This series of patches add support for offloading TC-MQPRIO Qdisc
to Chelsio T5/T6 NICs. Offloading QoS traffic shaping and pacing
requires using Ethernet Offload (ETHOFLD) resources available on
Chelsio NICs. The ETHOFLD resources are configured by firmware
and taken from the resource pool shared with other Chelsio Upper
Layer Drivers. Traffic flowing through ETHOFLD region requires a
software netdev Tx queue (EOSW_TXQ) exposed to networking stack,
and an underlying hardware Tx queue (EOHW_TXQ) used for sending
packets through hardware.

ETHOFLD region is addressed using EOTIDs, which are per-connection
resource. Hence, EOTIDs are capable of storing only a very small
number of packets in flight. To allow more connections to share
the the QoS rate limiting configuration, multiple EOTIDs must be
allocated to reduce packet drops. EOTIDs are 1-to-1 mapped with
software EOSW_TXQ. Several software EOSW_TXQs can post packets to
a single hardware EOHW_TXQ.

The series is broken down as follows:

Patch 1 queries firmware for maximum available traffic classes,
as well as, start and maximum available indices (EOTID) into ETHOFLD
region, supported by the underlying device.

Patch 2 reworks queue configuration and simplifies MSI-X allocation
logic in preparation for ETHOFLD queues support.

Patch 3 adds skeleton for validating and configuring TC-MQPRIO Qdisc
offload. Also, adds support for software EOSW_TXQs and exposes them
to network stack. Updates Tx queue selection to use fallback NIC Tx
path for unsupported traffic that can't go through ETHOFLD queues.

Patch 4 adds support for managing hardware queues to rate limit
traffic flowing through them. The queues are allocated/removed based
on enabling/disabling TC-MQPRIO Qdisc offload, respectively.

Patch 5 adds Tx path for traffic flowing through software EOSW_TXQ
and EOHW_TXQ. Also, adds Rx path to handle Tx completions.

Patch 6 updates exisiting SCHED API to configure FLOWC based QoS
offload. In the existing QUEUE based rate limiting, multiple queues
sharing a traffic class get the aggreagated max rate limit value.
On the other hand, in FLOWC based rate limiting, multiple queues
sharing a traffic class get their own individual max rate limit
value. For example, if 2 queues are bound to class 0, which is rate
limited to 1 Gbps, then in QUEUE based rate limiting, both the
queues get the aggregate max output of 1 Gbps only. In FLOWC based
rate limiting, each queue gets its own output of max 1 Gbps each;
i.e. 2 queues * 1 Gbps rate limit = 2 Gbps max output.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1c8dd9cb 0e395b3c
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
	      cxgb4_uld.o srq.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
	      cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o cxgb4_mps.o \
	      cudbg_common.o cudbg_lib.o cudbg_zlib.o
	      cudbg_common.o cudbg_lib.o cudbg_zlib.o cxgb4_tc_mqprio.o
cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
+3 −0
Original line number Diff line number Diff line
@@ -325,6 +325,9 @@ enum cudbg_qdesc_qtype {
	CUDBG_QTYPE_CRYPTO_FLQ,
	CUDBG_QTYPE_TLS_RXQ,
	CUDBG_QTYPE_TLS_FLQ,
	CUDBG_QTYPE_ETHOFLD_TXQ,
	CUDBG_QTYPE_ETHOFLD_RXQ,
	CUDBG_QTYPE_ETHOFLD_FLQ,
	CUDBG_QTYPE_MAX,
};

+21 −0
Original line number Diff line number Diff line
@@ -2930,6 +2930,10 @@ void cudbg_fill_qdesc_num_and_size(const struct adapter *padap,
	tot_size += CXGB4_ULD_MAX * MAX_ULD_QSETS * SGE_MAX_IQ_SIZE *
		    MAX_RXQ_DESC_SIZE;

	/* ETHOFLD TXQ, RXQ, and FLQ */
	tot_entries += MAX_OFLD_QSETS * 3;
	tot_size += MAX_OFLD_QSETS * MAX_TXQ_ENTRIES * MAX_TXQ_DESC_SIZE;

	tot_size += sizeof(struct cudbg_ver_hdr) +
		    sizeof(struct cudbg_qdesc_info) +
		    sizeof(struct cudbg_qdesc_entry) * tot_entries;
@@ -3087,6 +3091,23 @@ int cudbg_collect_qdesc(struct cudbg_init *pdbg_init,
		}
	}

	/* ETHOFLD TXQ */
	if (s->eohw_txq)
		for (i = 0; i < s->eoqsets; i++)
			QDESC_GET_TXQ(&s->eohw_txq[i].q,
				      CUDBG_QTYPE_ETHOFLD_TXQ, out);

	/* ETHOFLD RXQ and FLQ */
	if (s->eohw_rxq) {
		for (i = 0; i < s->eoqsets; i++)
			QDESC_GET_RXQ(&s->eohw_rxq[i].rspq,
				      CUDBG_QTYPE_ETHOFLD_RXQ, out);

		for (i = 0; i < s->eoqsets; i++)
			QDESC_GET_FLQ(&s->eohw_rxq[i].fl,
				      CUDBG_QTYPE_ETHOFLD_FLQ, out);
	}

out_unlock:
	mutex_unlock(&uld_mutex);

+97 −12
Original line number Diff line number Diff line
@@ -392,6 +392,7 @@ struct adapter_params {
	struct arch_specific_params arch;  /* chip specific params */
	unsigned char offload;
	unsigned char crypto;		/* HW capability for crypto */
	unsigned char ethofld;		/* QoS support */

	unsigned char bypass;
	unsigned char hash_filter;
@@ -711,6 +712,7 @@ struct sge_eth_rxq { /* SW Ethernet Rx queue */
	struct sge_rspq rspq;
	struct sge_fl fl;
	struct sge_eth_stats stats;
	struct msix_info *msix;
} ____cacheline_aligned_in_smp;

struct sge_ofld_stats {             /* offload queue statistics */
@@ -724,6 +726,7 @@ struct sge_ofld_rxq { /* SW offload Rx queue */
	struct sge_rspq rspq;
	struct sge_fl fl;
	struct sge_ofld_stats stats;
	struct msix_info *msix;
} ____cacheline_aligned_in_smp;

struct tx_desc {
@@ -788,7 +791,6 @@ struct sge_ctrl_txq { /* state for an SGE control Tx queue */
struct sge_uld_rxq_info {
	char name[IFNAMSIZ];	/* name of ULD driver */
	struct sge_ofld_rxq *uldrxq; /* Rxq's for ULD */
	u16 *msix_tbl;		/* msix_tbl for uld */
	u16 *rspq_id;		/* response queue id's of rxq */
	u16 nrxq;		/* # of ingress uld queues */
	u16 nciq;		/* # of completion queues */
@@ -801,6 +803,55 @@ struct sge_uld_txq_info {
	u16 ntxq;		/* # of egress uld queues */
};

enum sge_eosw_state {
	CXGB4_EO_STATE_CLOSED = 0, /* Not ready to accept traffic */
	CXGB4_EO_STATE_FLOWC_OPEN_SEND, /* Send FLOWC open request */
	CXGB4_EO_STATE_FLOWC_OPEN_REPLY, /* Waiting for FLOWC open reply */
	CXGB4_EO_STATE_ACTIVE, /* Ready to accept traffic */
	CXGB4_EO_STATE_FLOWC_CLOSE_SEND, /* Send FLOWC close request */
	CXGB4_EO_STATE_FLOWC_CLOSE_REPLY, /* Waiting for FLOWC close reply */
};

struct sge_eosw_desc {
	struct sk_buff *skb; /* SKB to free after getting completion */
	dma_addr_t addr[MAX_SKB_FRAGS + 1]; /* DMA mapped addresses */
};

struct sge_eosw_txq {
	spinlock_t lock; /* Per queue lock to synchronize completions */
	enum sge_eosw_state state; /* Current ETHOFLD State */
	struct sge_eosw_desc *desc; /* Descriptor ring to hold packets */
	u32 ndesc; /* Number of descriptors */
	u32 pidx; /* Current Producer Index */
	u32 last_pidx; /* Last successfully transmitted Producer Index */
	u32 cidx; /* Current Consumer Index */
	u32 last_cidx; /* Last successfully reclaimed Consumer Index */
	u32 flowc_idx; /* Descriptor containing a FLOWC request */
	u32 inuse; /* Number of packets held in ring */

	u32 cred; /* Current available credits */
	u32 ncompl; /* # of completions posted */
	u32 last_compl; /* # of credits consumed since last completion req */

	u32 eotid; /* Index into EOTID table in software */
	u32 hwtid; /* Hardware EOTID index */

	u32 hwqid; /* Underlying hardware queue index */
	struct net_device *netdev; /* Pointer to netdevice */
	struct tasklet_struct qresume_tsk; /* Restarts the queue */
	struct completion completion; /* completion for FLOWC rendezvous */
};

struct sge_eohw_txq {
	spinlock_t lock; /* Per queue lock */
	struct sge_txq q; /* HW Txq */
	struct adapter *adap; /* Backpointer to adapter */
	unsigned long tso; /* # of TSO requests */
	unsigned long tx_cso; /* # of Tx checksum offloads */
	unsigned long vlan_ins; /* # of Tx VLAN insertions */
	unsigned long mapping_err; /* # of I/O MMU packet mapping errors */
};

struct sge {
	struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
	struct sge_eth_txq ptptxq;
@@ -814,11 +865,16 @@ struct sge {
	struct sge_rspq intrq ____cacheline_aligned_in_smp;
	spinlock_t intrq_lock;

	struct sge_eohw_txq *eohw_txq;
	struct sge_ofld_rxq *eohw_rxq;

	u16 max_ethqsets;           /* # of available Ethernet queue sets */
	u16 ethqsets;               /* # of active Ethernet queue sets */
	u16 ethtxq_rover;           /* Tx queue to clean up next */
	u16 ofldqsets;              /* # of active ofld queue sets */
	u16 nqs_per_uld;	    /* # of Rx queues per ULD */
	u16 eoqsets;                /* # of ETHOFLD queues */

	u16 timer_val[SGE_NTIMERS];
	u8 counter_val[SGE_NCOUNTERS];
	u16 dbqtimer_tick;
@@ -841,6 +897,9 @@ struct sge {
	unsigned long *blocked_fl;
	struct timer_list rx_timer; /* refills starving FLs */
	struct timer_list tx_timer; /* checks Tx queues */

	int fwevtq_msix_idx; /* Index to firmware event queue MSI-X info */
	int nd_msix_idx; /* Index to non-data interrupts MSI-X info */
};

#define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
@@ -870,13 +929,13 @@ struct hash_mac_addr {
	unsigned int iface_mac;
};

struct uld_msix_bmap {
struct msix_bmap {
	unsigned long *msix_bmap;
	unsigned int mapsize;
	spinlock_t lock; /* lock for acquiring bitmap */
};

struct uld_msix_info {
struct msix_info {
	unsigned short vec;
	char desc[IFNAMSIZ + 10];
	unsigned int idx;
@@ -945,14 +1004,9 @@ struct adapter {
	struct cxgb4_virt_res vres;
	unsigned int swintr;

	struct msix_info {
		unsigned short vec;
		char desc[IFNAMSIZ + 10];
		cpumask_var_t aff_mask;
	} msix_info[MAX_INGQ + 1];
	struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
	struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
	int msi_idx;
	/* MSI-X Info for NIC and OFLD queues */
	struct msix_info *msix_info;
	struct msix_bmap msix_bmap;

	struct doorbell_stats db_stats;
	struct sge sge;
@@ -1044,6 +1098,9 @@ struct adapter {
#if IS_ENABLED(CONFIG_THERMAL)
	struct ch_thermal ch_thermal;
#endif

	/* TC MQPRIO offload */
	struct cxgb4_tc_mqprio *tc_mqprio;
};

/* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1077,6 +1134,7 @@ enum {

enum {
	SCHED_CLASS_MODE_CLASS = 0,     /* per-class scheduling */
	SCHED_CLASS_MODE_FLOW,          /* per-flow scheduling */
};

enum {
@@ -1100,6 +1158,14 @@ struct ch_sched_queue {
	s8   class;    /* class index */
};

/* Support for "sched_flowc" command to allow one or more FLOWC
 * to be bound to a TX Scheduling Class.
 */
struct ch_sched_flowc {
	s32 tid;   /* TID to bind */
	s8  class; /* class index */
};

/* Defined bit width of user definable filter tuples
 */
#define ETHTYPE_BITWIDTH 16
@@ -1293,6 +1359,11 @@ static inline int is_uld(const struct adapter *adap)
	return (adap->params.offload || adap->params.crypto);
}

static inline int is_ethofld(const struct adapter *adap)
{
	return adap->params.ethofld;
}

static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
{
	return readl(adap->regs + reg_addr);
@@ -1426,6 +1497,9 @@ int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
			 struct net_device *dev, unsigned int iqid,
			 unsigned int uld_type);
int t4_sge_alloc_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq,
			     struct net_device *dev, u32 iqid);
void t4_sge_free_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq);
irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
int t4_sge_init(struct adapter *adap);
void t4_sge_start(struct adapter *adap);
@@ -1890,6 +1964,12 @@ int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port,
void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
void free_tx_desc(struct adapter *adap, struct sge_txq *q,
		  unsigned int n, bool unmap);
void cxgb4_eosw_txq_free_desc(struct adapter *adap, struct sge_eosw_txq *txq,
			      u32 ndesc);
int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc);
void cxgb4_ethofld_restart(unsigned long data);
int cxgb4_ethofld_rx_handler(struct sge_rspq *q, const __be64 *rsp,
			     const struct pkt_gl *si);
void free_txq(struct adapter *adap, struct sge_txq *q);
void cxgb4_reclaim_completed_tx(struct adapter *adap,
				struct sge_txq *q, bool unmap);
@@ -1948,5 +2028,10 @@ int cxgb4_alloc_raw_mac_filt(struct adapter *adap,
int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid,
			  int *tcam_idx, const u8 *addr,
			  bool persistent, u8 *smt_idx);

int cxgb4_get_msix_idx_from_bmap(struct adapter *adap);
void cxgb4_free_msix_idx_in_bmap(struct adapter *adap, u32 msix_idx);
int cxgb_open(struct net_device *dev);
int cxgb_close(struct net_device *dev);
void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q);
void cxgb4_quiesce_rx(struct sge_rspq *q);
#endif /* __CXGB4_H__ */
+52 −1
Original line number Diff line number Diff line
@@ -2658,6 +2658,7 @@ static int sge_qinfo_uld_ciq_entries(const struct adapter *adap, int uld)

static int sge_qinfo_show(struct seq_file *seq, void *v)
{
	int eth_entries, ctrl_entries, eo_entries = 0;
	int uld_rxq_entries[CXGB4_ULD_MAX] = { 0 };
	int uld_ciq_entries[CXGB4_ULD_MAX] = { 0 };
	int uld_txq_entries[CXGB4_TX_MAX] = { 0 };
@@ -2665,11 +2666,12 @@ static int sge_qinfo_show(struct seq_file *seq, void *v)
	const struct sge_uld_rxq_info *urxq_info;
	struct adapter *adap = seq->private;
	int i, n, r = (uintptr_t)v - 1;
	int eth_entries, ctrl_entries;
	struct sge *s = &adap->sge;

	eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
	ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
	if (adap->sge.eohw_txq)
		eo_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);

	mutex_lock(&uld_mutex);
	if (s->uld_txq_info)
@@ -2761,6 +2763,54 @@ do { \
	}

	r -= eth_entries;
	if (r < eo_entries) {
		int base_qset = r * 4;
		const struct sge_ofld_rxq *rx = &s->eohw_rxq[base_qset];
		const struct sge_eohw_txq *tx = &s->eohw_txq[base_qset];

		n = min(4, s->eoqsets - 4 * r);

		S("QType:", "ETHOFLD");
		S("Interface:",
		  rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
		T("TxQ ID:", q.cntxt_id);
		T("TxQ size:", q.size);
		T("TxQ inuse:", q.in_use);
		T("TxQ CIDX:", q.cidx);
		T("TxQ PIDX:", q.pidx);
		R("RspQ ID:", rspq.abs_id);
		R("RspQ size:", rspq.size);
		R("RspQE size:", rspq.iqe_len);
		R("RspQ CIDX:", rspq.cidx);
		R("RspQ Gen:", rspq.gen);
		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
		S3("u", "Intr pktcnt:", s->counter_val[rx[i].rspq.pktcnt_idx]);
		R("FL ID:", fl.cntxt_id);
		S3("u", "FL size:", rx->fl.size ? rx->fl.size - 8 : 0);
		R("FL pend:", fl.pend_cred);
		R("FL avail:", fl.avail);
		R("FL PIDX:", fl.pidx);
		R("FL CIDX:", fl.cidx);
		RL("RxPackets:", stats.pkts);
		RL("RxImm:", stats.imm);
		RL("RxAN", stats.an);
		RL("RxNoMem", stats.nomem);
		TL("TSO:", tso);
		TL("TxCSO:", tx_cso);
		TL("VLANins:", vlan_ins);
		TL("TxQFull:", q.stops);
		TL("TxQRestarts:", q.restarts);
		TL("TxMapErr:", mapping_err);
		RL("FLAllocErr:", fl.alloc_failed);
		RL("FLLrgAlcErr:", fl.large_alloc_failed);
		RL("FLMapErr:", fl.mapping_err);
		RL("FLLow:", fl.low);
		RL("FLStarving:", fl.starving);

		goto unlock;
	}

	r -= eo_entries;
	if (r < uld_txq_entries[CXGB4_TX_OFLD]) {
		const struct sge_uld_txq *tx;

@@ -3007,6 +3057,7 @@ static int sge_queue_entries(const struct adapter *adap)
	mutex_unlock(&uld_mutex);

	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
	       (adap->sge.eohw_txq ? DIV_ROUND_UP(adap->sge.eoqsets, 4) : 0) +
	       tot_uld_entries +
	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
}
Loading