Commit d42d118c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'cxgb4-improve-and-tune-TC-MQPRIO-offload'



Rahul Lakkireddy says:

====================
cxgb4: improve and tune TC-MQPRIO offload

Patch 1 improves the Tx path's credit request and recovery mechanism
when running under heavy load.

Patch 2 adds ability to tune the burst buffer sizes of all traffic
classes to improve performance for <= 1500 MTU, under heavy load.

Patch 3 adds support to track EOTIDs and dump software queue
contexts used by TC-MQPRIO offload.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3430223d 5148e595
Loading
Loading
Loading
Loading
+16 −14
Original line number Diff line number Diff line
@@ -1125,19 +1125,20 @@ struct adapter {
 * programmed with various parameters.
 */
struct ch_sched_params {
	s8   type;                     /* packet or flow */
	u8   type;                     /* packet or flow */
	union {
		struct {
			s8   level;    /* scheduler hierarchy level */
			s8   mode;     /* per-class or per-flow */
			s8   rateunit; /* bit or packet rate */
			s8   ratemode; /* %port relative or kbps absolute */
			s8   channel;  /* scheduler channel [0..N] */
			s8   class;    /* scheduler class [0..N] */
			s32  minrate;  /* minimum rate */
			s32  maxrate;  /* maximum rate */
			s16  weight;   /* percent weight */
			s16  pktsize;  /* average packet size */
			u8   level;    /* scheduler hierarchy level */
			u8   mode;     /* per-class or per-flow */
			u8   rateunit; /* bit or packet rate */
			u8   ratemode; /* %port relative or kbps absolute */
			u8   channel;  /* scheduler channel [0..N] */
			u8   class;    /* scheduler class [0..N] */
			u32  minrate;  /* minimum rate */
			u32  maxrate;  /* maximum rate */
			u16  weight;   /* percent weight */
			u16  pktsize;  /* average packet size */
			u16  burstsize;  /* burst buffer size */
		} params;
	} u;
};
@@ -1952,9 +1953,10 @@ int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid,
		   enum ctxt_type ctype, u32 *data);
int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
		      enum ctxt_type ctype, u32 *data);
int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
		    int rateunit, int ratemode, int channel, int class,
		    int minrate, int maxrate, int weight, int pktsize);
int t4_sched_params(struct adapter *adapter, u8 type, u8 level, u8 mode,
		    u8 rateunit, u8 ratemode, u8 channel, u8 class,
		    u32 minrate, u32 maxrate, u16 weight, u16 pktsize,
		    u16 burstsize);
void t4_sge_decode_idma_state(struct adapter *adapter, int state);
void t4_idma_monitor_init(struct adapter *adapter,
			  struct sge_idma_monitor_state *idma);
+116 −28
Original line number Diff line number Diff line
@@ -49,6 +49,7 @@
#include "cudbg_lib_common.h"
#include "cudbg_entity.h"
#include "cudbg_lib.h"
#include "cxgb4_tc_mqprio.h"

/* generic seq_file support for showing a table of size rows x width. */
static void *seq_tab_get_idx(struct seq_tab *tb, loff_t pos)
@@ -2657,32 +2658,19 @@ static int sge_qinfo_uld_ciq_entries(const struct adapter *adap, int uld)

static int sge_qinfo_show(struct seq_file *seq, void *v)
{
	int eth_entries, ctrl_entries, eo_entries = 0;
	int eth_entries, ctrl_entries, eohw_entries = 0, eosw_entries = 0;
	int uld_rxq_entries[CXGB4_ULD_MAX] = { 0 };
	int uld_ciq_entries[CXGB4_ULD_MAX] = { 0 };
	int uld_txq_entries[CXGB4_TX_MAX] = { 0 };
	const struct sge_uld_txq_info *utxq_info;
	const struct sge_uld_rxq_info *urxq_info;
	struct cxgb4_tc_port_mqprio *port_mqprio;
	struct adapter *adap = seq->private;
	int i, n, r = (uintptr_t)v - 1;
	int i, j, n, r = (uintptr_t)v - 1;
	struct sge *s = &adap->sge;

	eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
	ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
	if (adap->sge.eohw_txq)
		eo_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);

	mutex_lock(&uld_mutex);
	if (s->uld_txq_info)
		for (i = 0; i < ARRAY_SIZE(uld_txq_entries); i++)
			uld_txq_entries[i] = sge_qinfo_uld_txq_entries(adap, i);

	if (s->uld_rxq_info) {
		for (i = 0; i < ARRAY_SIZE(uld_rxq_entries); i++) {
			uld_rxq_entries[i] = sge_qinfo_uld_rxq_entries(adap, i);
			uld_ciq_entries[i] = sge_qinfo_uld_ciq_entries(adap, i);
		}
	}

	if (r)
		seq_putc(seq, '\n');
@@ -2759,11 +2747,21 @@ do { \
		RL("FLLow:", fl.low);
		RL("FLStarving:", fl.starving);

		goto unlock;
		goto out;
	}

	r -= eth_entries;
	if (r < eo_entries) {
	if (!adap->tc_mqprio)
		goto skip_mqprio;

	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
	if (!refcount_read(&adap->tc_mqprio->refcnt)) {
		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
		goto skip_mqprio;
	}

	eohw_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
	if (r < eohw_entries) {
		int base_qset = r * 4;
		const struct sge_ofld_rxq *rx = &s->eohw_rxq[base_qset];
		const struct sge_eohw_txq *tx = &s->eohw_txq[base_qset];
@@ -2808,10 +2806,71 @@ do { \
		RL("FLLow:", fl.low);
		RL("FLStarving:", fl.starving);

		goto unlock;
		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
		goto out;
	}

	r -= eohw_entries;
	for (j = 0; j < adap->params.nports; j++) {
		int entries;
		u8 tc;

		port_mqprio = &adap->tc_mqprio->port_mqprio[j];
		entries = 0;
		for (tc = 0; tc < port_mqprio->mqprio.qopt.num_tc; tc++)
			entries += port_mqprio->mqprio.qopt.count[tc];

		if (!entries)
			continue;

		eosw_entries = DIV_ROUND_UP(entries, 4);
		if (r < eosw_entries) {
			const struct sge_eosw_txq *tx;

			n = min(4, entries - 4 * r);
			tx = &port_mqprio->eosw_txq[4 * r];

			S("QType:", "EOSW-TXQ");
			S("Interface:",
			  adap->port[j] ? adap->port[j]->name : "N/A");
			T("EOTID:", hwtid);
			T("HWQID:", hwqid);
			T("State:", state);
			T("Size:", ndesc);
			T("In-Use:", inuse);
			T("Credits:", cred);
			T("Compl:", ncompl);
			T("Last-Compl:", last_compl);
			T("PIDX:", pidx);
			T("Last-PIDX:", last_pidx);
			T("CIDX:", cidx);
			T("Last-CIDX:", last_cidx);
			T("FLOWC-IDX:", flowc_idx);

			mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
			goto out;
		}

		r -= eosw_entries;
	}
	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);

skip_mqprio:
	if (!is_uld(adap))
		goto skip_uld;

	mutex_lock(&uld_mutex);
	if (s->uld_txq_info)
		for (i = 0; i < ARRAY_SIZE(uld_txq_entries); i++)
			uld_txq_entries[i] = sge_qinfo_uld_txq_entries(adap, i);

	if (s->uld_rxq_info) {
		for (i = 0; i < ARRAY_SIZE(uld_rxq_entries); i++) {
			uld_rxq_entries[i] = sge_qinfo_uld_rxq_entries(adap, i);
			uld_ciq_entries[i] = sge_qinfo_uld_ciq_entries(adap, i);
		}
	}

	r -= eo_entries;
	if (r < uld_txq_entries[CXGB4_TX_OFLD]) {
		const struct sge_uld_txq *tx;

@@ -2994,6 +3053,9 @@ do { \
	}

	r -= uld_txq_entries[CXGB4_TX_CRYPTO];
	mutex_unlock(&uld_mutex);

skip_uld:
	if (r < ctrl_entries) {
		const struct sge_ctrl_txq *tx = &s->ctrlq[r * 4];

@@ -3008,7 +3070,7 @@ do { \
		TL("TxQFull:", q.stops);
		TL("TxQRestarts:", q.restarts);

		goto unlock;
		goto out;
	}

	r -= ctrl_entries;
@@ -3026,11 +3088,9 @@ do { \
		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
			   s->counter_val[evtq->pktcnt_idx]);

		goto unlock;
		goto out;
	}

unlock:
	mutex_unlock(&uld_mutex);
#undef R
#undef RL
#undef T
@@ -3039,13 +3099,38 @@ unlock:
#undef R3
#undef T3
#undef S3
out:
	return 0;

unlock:
	mutex_unlock(&uld_mutex);
	return 0;
}

static int sge_queue_entries(const struct adapter *adap)
{
	int tot_uld_entries = 0;
	int i;
	int i, tot_uld_entries = 0, eohw_entries = 0, eosw_entries = 0;

	if (adap->tc_mqprio) {
		struct cxgb4_tc_port_mqprio *port_mqprio;
		u8 tc;

		mutex_lock(&adap->tc_mqprio->mqprio_mutex);
		if (adap->sge.eohw_txq)
			eohw_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);

		for (i = 0; i < adap->params.nports; i++) {
			u32 entries = 0;

			port_mqprio = &adap->tc_mqprio->port_mqprio[i];
			for (tc = 0; tc < port_mqprio->mqprio.qopt.num_tc; tc++)
				entries += port_mqprio->mqprio.qopt.count[tc];

			if (entries)
				eosw_entries += DIV_ROUND_UP(entries, 4);
		}
		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
	}

	if (!is_uld(adap))
		goto lld_only;
@@ -3062,8 +3147,7 @@ static int sge_queue_entries(const struct adapter *adap)

lld_only:
	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
	       (adap->sge.eohw_txq ? DIV_ROUND_UP(adap->sge.eoqsets, 4) : 0) +
	       tot_uld_entries +
	       eohw_entries + eosw_entries + tot_uld_entries +
	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
}

@@ -3244,6 +3328,10 @@ static int tid_info_show(struct seq_file *seq, void *v)
	if (t->nhpftids)
		seq_printf(seq, "HPFTID range: %u..%u\n", t->hpftid_base,
			   t->hpftid_base + t->nhpftids - 1);
	if (t->neotids)
		seq_printf(seq, "EOTID range: %u..%u, in use: %u\n",
			   t->eotid_base, t->eotid_base + t->neotids - 1,
			   atomic_read(&t->eotids_in_use));
	if (t->ntids)
		seq_printf(seq, "HW TID usage: %u IP users, %u IPv6 users\n",
			   t4_read_reg(adap, LE_DB_ACT_CNT_IPV4_A),
+2 −1
Original line number Diff line number Diff line
@@ -1579,6 +1579,7 @@ static int tid_init(struct tid_info *t)
	atomic_set(&t->tids_in_use, 0);
	atomic_set(&t->conns_in_use, 0);
	atomic_set(&t->hash_tids_in_use, 0);
	atomic_set(&t->eotids_in_use, 0);

	/* Setup the free list for atid_tab and clear the stid bitmap. */
	if (natids) {
@@ -3021,7 +3022,7 @@ static int cxgb4_mgmt_set_vf_rate(struct net_device *dev, int vf,
			      SCHED_CLASS_RATEUNIT_BITS,
			      SCHED_CLASS_RATEMODE_ABS,
			      pi->tx_chan, class_id, 0,
			      max_tx_rate * 1000, 0, pktsize);
			      max_tx_rate * 1000, 0, pktsize, 0);
	if (ret) {
		dev_err(adap->pdev_dev, "Err %d for Traffic Class config\n",
			ret);
+17 −0
Original line number Diff line number Diff line
@@ -342,6 +342,13 @@ static int cxgb4_mqprio_alloc_tc(struct net_device *dev,
		p.u.params.minrate = div_u64(mqprio->min_rate[i] * 8, 1000);
		p.u.params.maxrate = div_u64(mqprio->max_rate[i] * 8, 1000);

		/* Request larger burst buffer for smaller MTU, so
		 * that hardware can work on more data per burst
		 * cycle.
		 */
		if (dev->mtu <= ETH_DATA_LEN)
			p.u.params.burstsize = 8 * dev->mtu;

		e = cxgb4_sched_class_alloc(dev, &p);
		if (!e) {
			ret = -ENOMEM;
@@ -567,6 +574,7 @@ static void cxgb4_mqprio_disable_offload(struct net_device *dev)
int cxgb4_setup_tc_mqprio(struct net_device *dev,
			  struct tc_mqprio_qopt_offload *mqprio)
{
	struct adapter *adap = netdev2adap(dev);
	bool needs_bring_up = false;
	int ret;

@@ -574,6 +582,8 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev,
	if (ret)
		return ret;

	mutex_lock(&adap->tc_mqprio->mqprio_mutex);

	/* To configure tc params, the current allocated EOTIDs must
	 * be freed up. However, they can't be freed up if there's
	 * traffic running on the interface. So, ensure interface is
@@ -609,6 +619,7 @@ out:
	if (needs_bring_up)
		cxgb_open(dev);

	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
	return ret;
}

@@ -621,6 +632,7 @@ void cxgb4_mqprio_stop_offload(struct adapter *adap)
	if (!adap->tc_mqprio || !adap->tc_mqprio->port_mqprio)
		return;

	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
	for_each_port(adap, i) {
		dev = adap->port[i];
		if (!dev)
@@ -632,6 +644,7 @@ void cxgb4_mqprio_stop_offload(struct adapter *adap)

		cxgb4_mqprio_disable_offload(dev);
	}
	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
}

int cxgb4_init_tc_mqprio(struct adapter *adap)
@@ -653,6 +666,8 @@ int cxgb4_init_tc_mqprio(struct adapter *adap)
		goto out_free_mqprio;
	}

	mutex_init(&tc_mqprio->mqprio_mutex);

	tc_mqprio->port_mqprio = tc_port_mqprio;
	for (i = 0; i < adap->params.nports; i++) {
		port_mqprio = &tc_mqprio->port_mqprio[i];
@@ -687,6 +702,7 @@ void cxgb4_cleanup_tc_mqprio(struct adapter *adap)
	u8 i;

	if (adap->tc_mqprio) {
		mutex_lock(&adap->tc_mqprio->mqprio_mutex);
		if (adap->tc_mqprio->port_mqprio) {
			for (i = 0; i < adap->params.nports; i++) {
				struct net_device *dev = adap->port[i];
@@ -698,6 +714,7 @@ void cxgb4_cleanup_tc_mqprio(struct adapter *adap)
			}
			kfree(adap->tc_mqprio->port_mqprio);
		}
		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
		kfree(adap->tc_mqprio);
	}
}
+1 −0
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ struct cxgb4_tc_port_mqprio {

struct cxgb4_tc_mqprio {
	refcount_t refcnt; /* Refcount for adapter-wide resources */
	struct mutex mqprio_mutex; /* Lock for accessing MQPRIO info */
	struct cxgb4_tc_port_mqprio *port_mqprio; /* Per port MQPRIO info */
};

Loading