Commit 58275410 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'dpaa2-eth-Introduce-XDP-support'



Ioana Ciocoi Radulescu says:

====================
dpaa2-eth: Introduce XDP support

Add support for XDP programs. Only XDP_PASS, XDP_DROP and XDP_TX
actions are supported for now. Frame header changes are also
allowed.

v2: - count the XDP packets in the rx/tx inteface stats
    - add message with the maximum supported MTU value for XDP
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 86d1d8b7 a4a7b762
Loading
Loading
Loading
Loading
+326 −23
Original line number Diff line number Diff line
@@ -13,7 +13,8 @@
#include <linux/iommu.h>
#include <linux/net_tstamp.h>
#include <linux/fsl/mc.h>

#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <net/sock.h>

#include "dpaa2-eth.h"
@@ -86,7 +87,7 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv,
		addr = dpaa2_sg_get_addr(&sgt[i]);
		sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
		dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
				 DMA_FROM_DEVICE);
				 DMA_BIDIRECTIONAL);

		skb_free_frag(sg_vaddr);
		if (dpaa2_sg_is_final(&sgt[i]))
@@ -144,7 +145,7 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
		sg_addr = dpaa2_sg_get_addr(sge);
		sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, sg_addr);
		dma_unmap_single(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE,
				 DMA_FROM_DEVICE);
				 DMA_BIDIRECTIONAL);

		sg_length = dpaa2_sg_get_len(sge);

@@ -199,6 +200,141 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
	return skb;
}

/* Free buffers acquired from the buffer pool or which were meant to
 * be released in the pool
 */
static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
{
	struct device *dev = priv->net_dev->dev.parent;
	void *vaddr;
	int i;

	for (i = 0; i < count; i++) {
		vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]);
		dma_unmap_single(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
				 DMA_BIDIRECTIONAL);
		skb_free_frag(vaddr);
	}
}

static void xdp_release_buf(struct dpaa2_eth_priv *priv,
			    struct dpaa2_eth_channel *ch,
			    dma_addr_t addr)
{
	int err;

	ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr;
	if (ch->xdp.drop_cnt < DPAA2_ETH_BUFS_PER_CMD)
		return;

	while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid,
					       ch->xdp.drop_bufs,
					       ch->xdp.drop_cnt)) == -EBUSY)
		cpu_relax();

	if (err) {
		free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
		ch->buf_count -= ch->xdp.drop_cnt;
	}

	ch->xdp.drop_cnt = 0;
}

static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
		       void *buf_start, u16 queue_id)
{
	struct dpaa2_eth_fq *fq;
	struct dpaa2_faead *faead;
	u32 ctrl, frc;
	int i, err;

	/* Mark the egress frame hardware annotation area as valid */
	frc = dpaa2_fd_get_frc(fd);
	dpaa2_fd_set_frc(fd, frc | DPAA2_FD_FRC_FAEADV);
	dpaa2_fd_set_ctrl(fd, DPAA2_FD_CTRL_ASAL);

	/* Instruct hardware to release the FD buffer directly into
	 * the buffer pool once transmission is completed, instead of
	 * sending a Tx confirmation frame to us
	 */
	ctrl = DPAA2_FAEAD_A4V | DPAA2_FAEAD_A2V | DPAA2_FAEAD_EBDDV;
	faead = dpaa2_get_faead(buf_start, false);
	faead->ctrl = cpu_to_le32(ctrl);
	faead->conf_fqid = 0;

	fq = &priv->fq[queue_id];
	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
		err = dpaa2_io_service_enqueue_qd(fq->channel->dpio,
						  priv->tx_qdid, 0,
						  fq->tx_qdbin, fd);
		if (err != -EBUSY)
			break;
	}

	return err;
}

static u32 run_xdp(struct dpaa2_eth_priv *priv,
		   struct dpaa2_eth_channel *ch,
		   struct dpaa2_eth_fq *rx_fq,
		   struct dpaa2_fd *fd, void *vaddr)
{
	dma_addr_t addr = dpaa2_fd_get_addr(fd);
	struct rtnl_link_stats64 *percpu_stats;
	struct bpf_prog *xdp_prog;
	struct xdp_buff xdp;
	u32 xdp_act = XDP_PASS;
	int err;

	percpu_stats = this_cpu_ptr(priv->percpu_stats);

	rcu_read_lock();

	xdp_prog = READ_ONCE(ch->xdp.prog);
	if (!xdp_prog)
		goto out;

	xdp.data = vaddr + dpaa2_fd_get_offset(fd);
	xdp.data_end = xdp.data + dpaa2_fd_get_len(fd);
	xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
	xdp_set_data_meta_invalid(&xdp);

	xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);

	/* xdp.data pointer may have changed */
	dpaa2_fd_set_offset(fd, xdp.data - vaddr);
	dpaa2_fd_set_len(fd, xdp.data_end - xdp.data);

	switch (xdp_act) {
	case XDP_PASS:
		break;
	case XDP_TX:
		err = xdp_enqueue(priv, fd, vaddr, rx_fq->flowid);
		if (err) {
			xdp_release_buf(priv, ch, addr);
			percpu_stats->tx_errors++;
			ch->stats.xdp_tx_err++;
		} else {
			percpu_stats->tx_packets++;
			percpu_stats->tx_bytes += dpaa2_fd_get_len(fd);
			ch->stats.xdp_tx++;
		}
		break;
	default:
		bpf_warn_invalid_xdp_action(xdp_act);
	case XDP_ABORTED:
		trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
	case XDP_DROP:
		xdp_release_buf(priv, ch, addr);
		ch->stats.xdp_drop++;
		break;
	}

out:
	rcu_read_unlock();
	return xdp_act;
}

/* Main Rx frame processing routine */
static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
			 struct dpaa2_eth_channel *ch,
@@ -215,12 +351,14 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
	struct dpaa2_fas *fas;
	void *buf_data;
	u32 status = 0;
	u32 xdp_act;

	/* Tracing point */
	trace_dpaa2_rx_fd(priv->net_dev, fd);

	vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
	dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE, DMA_FROM_DEVICE);
	dma_sync_single_for_cpu(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
				DMA_BIDIRECTIONAL);

	fas = dpaa2_get_fas(vaddr, false);
	prefetch(fas);
@@ -231,8 +369,21 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
	percpu_extras = this_cpu_ptr(priv->percpu_extras);

	if (fd_format == dpaa2_fd_single) {
		xdp_act = run_xdp(priv, ch, fq, (struct dpaa2_fd *)fd, vaddr);
		if (xdp_act != XDP_PASS) {
			percpu_stats->rx_packets++;
			percpu_stats->rx_bytes += dpaa2_fd_get_len(fd);
			return;
		}

		dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
				 DMA_BIDIRECTIONAL);
		skb = build_linear_skb(ch, fd, vaddr);
	} else if (fd_format == dpaa2_fd_sg) {
		WARN_ON(priv->xdp_prog);

		dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
				 DMA_BIDIRECTIONAL);
		skb = build_frag_skb(priv, ch, buf_data);
		skb_free_frag(vaddr);
		percpu_extras->rx_sg_frames++;
@@ -319,7 +470,6 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
		return 0;

	fq->stats.frames += cleaned;
	ch->stats.frames += cleaned;

	/* A dequeue operation only pulls frames from a single queue
	 * into the store. Return the frame queue as an out param.
@@ -743,23 +893,6 @@ static int set_tx_csum(struct dpaa2_eth_priv *priv, bool enable)
	return 0;
}

/* Free buffers acquired from the buffer pool or which were meant to
 * be released in the pool
 */
static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
{
	struct device *dev = priv->net_dev->dev.parent;
	void *vaddr;
	int i;

	for (i = 0; i < count; i++) {
		vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]);
		dma_unmap_single(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
				 DMA_FROM_DEVICE);
		skb_free_frag(vaddr);
	}
}

/* Perform a single release command to add buffers
 * to the specified buffer pool
 */
@@ -783,7 +916,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
		buf = PTR_ALIGN(buf, priv->rx_buf_align);

		addr = dma_map_single(dev, buf, DPAA2_ETH_RX_BUF_SIZE,
				      DMA_FROM_DEVICE);
				      DMA_BIDIRECTIONAL);
		if (unlikely(dma_mapping_error(dev, addr)))
			goto err_map;

@@ -1427,6 +1560,174 @@ static int dpaa2_eth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
	return -EINVAL;
}

static bool xdp_mtu_valid(struct dpaa2_eth_priv *priv, int mtu)
{
	int mfl, linear_mfl;

	mfl = DPAA2_ETH_L2_MAX_FRM(mtu);
	linear_mfl = DPAA2_ETH_RX_BUF_SIZE - DPAA2_ETH_RX_HWA_SIZE -
		     dpaa2_eth_rx_head_room(priv) - XDP_PACKET_HEADROOM;

	if (mfl > linear_mfl) {
		netdev_warn(priv->net_dev, "Maximum MTU for XDP is %d\n",
			    linear_mfl - VLAN_ETH_HLEN);
		return false;
	}

	return true;
}

static int set_rx_mfl(struct dpaa2_eth_priv *priv, int mtu, bool has_xdp)
{
	int mfl, err;

	/* We enforce a maximum Rx frame length based on MTU only if we have
	 * an XDP program attached (in order to avoid Rx S/G frames).
	 * Otherwise, we accept all incoming frames as long as they are not
	 * larger than maximum size supported in hardware
	 */
	if (has_xdp)
		mfl = DPAA2_ETH_L2_MAX_FRM(mtu);
	else
		mfl = DPAA2_ETH_MFL;

	err = dpni_set_max_frame_length(priv->mc_io, 0, priv->mc_token, mfl);
	if (err) {
		netdev_err(priv->net_dev, "dpni_set_max_frame_length failed\n");
		return err;
	}

	return 0;
}

static int dpaa2_eth_change_mtu(struct net_device *dev, int new_mtu)
{
	struct dpaa2_eth_priv *priv = netdev_priv(dev);
	int err;

	if (!priv->xdp_prog)
		goto out;

	if (!xdp_mtu_valid(priv, new_mtu))
		return -EINVAL;

	err = set_rx_mfl(priv, new_mtu, true);
	if (err)
		return err;

out:
	dev->mtu = new_mtu;
	return 0;
}

static int update_rx_buffer_headroom(struct dpaa2_eth_priv *priv, bool has_xdp)
{
	struct dpni_buffer_layout buf_layout = {0};
	int err;

	err = dpni_get_buffer_layout(priv->mc_io, 0, priv->mc_token,
				     DPNI_QUEUE_RX, &buf_layout);
	if (err) {
		netdev_err(priv->net_dev, "dpni_get_buffer_layout failed\n");
		return err;
	}

	/* Reserve extra headroom for XDP header size changes */
	buf_layout.data_head_room = dpaa2_eth_rx_head_room(priv) +
				    (has_xdp ? XDP_PACKET_HEADROOM : 0);
	buf_layout.options = DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM;
	err = dpni_set_buffer_layout(priv->mc_io, 0, priv->mc_token,
				     DPNI_QUEUE_RX, &buf_layout);
	if (err) {
		netdev_err(priv->net_dev, "dpni_set_buffer_layout failed\n");
		return err;
	}

	return 0;
}

static int setup_xdp(struct net_device *dev, struct bpf_prog *prog)
{
	struct dpaa2_eth_priv *priv = netdev_priv(dev);
	struct dpaa2_eth_channel *ch;
	struct bpf_prog *old;
	bool up, need_update;
	int i, err;

	if (prog && !xdp_mtu_valid(priv, dev->mtu))
		return -EINVAL;

	if (prog) {
		prog = bpf_prog_add(prog, priv->num_channels);
		if (IS_ERR(prog))
			return PTR_ERR(prog);
	}

	up = netif_running(dev);
	need_update = (!!priv->xdp_prog != !!prog);

	if (up)
		dpaa2_eth_stop(dev);

	/* While in xdp mode, enforce a maximum Rx frame size based on MTU.
	 * Also, when switching between xdp/non-xdp modes we need to reconfigure
	 * our Rx buffer layout. Buffer pool was drained on dpaa2_eth_stop,
	 * so we are sure no old format buffers will be used from now on.
	 */
	if (need_update) {
		err = set_rx_mfl(priv, dev->mtu, !!prog);
		if (err)
			goto out_err;
		err = update_rx_buffer_headroom(priv, !!prog);
		if (err)
			goto out_err;
	}

	old = xchg(&priv->xdp_prog, prog);
	if (old)
		bpf_prog_put(old);

	for (i = 0; i < priv->num_channels; i++) {
		ch = priv->channel[i];
		old = xchg(&ch->xdp.prog, prog);
		if (old)
			bpf_prog_put(old);
	}

	if (up) {
		err = dpaa2_eth_open(dev);
		if (err)
			return err;
	}

	return 0;

out_err:
	if (prog)
		bpf_prog_sub(prog, priv->num_channels);
	if (up)
		dpaa2_eth_open(dev);

	return err;
}

static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
	struct dpaa2_eth_priv *priv = netdev_priv(dev);

	switch (xdp->command) {
	case XDP_SETUP_PROG:
		return setup_xdp(dev, xdp->prog);
	case XDP_QUERY_PROG:
		xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0;
		break;
	default:
		return -EINVAL;
	}

	return 0;
}

static const struct net_device_ops dpaa2_eth_ops = {
	.ndo_open = dpaa2_eth_open,
	.ndo_start_xmit = dpaa2_eth_tx,
@@ -1436,6 +1737,8 @@ static const struct net_device_ops dpaa2_eth_ops = {
	.ndo_set_rx_mode = dpaa2_eth_set_rx_mode,
	.ndo_set_features = dpaa2_eth_set_features,
	.ndo_do_ioctl = dpaa2_eth_ioctl,
	.ndo_change_mtu = dpaa2_eth_change_mtu,
	.ndo_bpf = dpaa2_eth_xdp,
};

static void cdan_cb(struct dpaa2_io_notification_ctx *ctx)
+16 −4
Original line number Diff line number Diff line
@@ -139,7 +139,9 @@ struct dpaa2_faead {
};

#define DPAA2_FAEAD_A2V			0x20000000
#define DPAA2_FAEAD_A4V			0x08000000
#define DPAA2_FAEAD_UPDV		0x00001000
#define DPAA2_FAEAD_EBDDV		0x00002000
#define DPAA2_FAEAD_UPD			0x00000010

/* Accessors for the hardware annotation fields that we use */
@@ -243,12 +245,14 @@ struct dpaa2_eth_fq_stats {
struct dpaa2_eth_ch_stats {
	/* Volatile dequeues retried due to portal busy */
	__u64 dequeue_portal_busy;
	/* Number of CDANs; useful to estimate avg NAPI len */
	__u64 cdan;
	/* Number of frames received on queues from this channel */
	__u64 frames;
	/* Pull errors */
	__u64 pull_err;
	/* Number of CDANs; useful to estimate avg NAPI len */
	__u64 cdan;
	/* XDP counters */
	__u64 xdp_drop;
	__u64 xdp_tx;
	__u64 xdp_tx_err;
};

/* Maximum number of queues associated with a DPNI */
@@ -283,6 +287,12 @@ struct dpaa2_eth_fq {
	struct dpaa2_eth_fq_stats stats;
};

struct dpaa2_eth_ch_xdp {
	struct bpf_prog *prog;
	u64 drop_bufs[DPAA2_ETH_BUFS_PER_CMD];
	int drop_cnt;
};

struct dpaa2_eth_channel {
	struct dpaa2_io_notification_ctx nctx;
	struct fsl_mc_device *dpcon;
@@ -294,6 +304,7 @@ struct dpaa2_eth_channel {
	struct dpaa2_eth_priv *priv;
	int buf_count;
	struct dpaa2_eth_ch_stats stats;
	struct dpaa2_eth_ch_xdp xdp;
};

struct dpaa2_eth_dist_fields {
@@ -353,6 +364,7 @@ struct dpaa2_eth_priv {
	u64 rx_hash_fields;
	struct dpaa2_eth_cls_rule *cls_rules;
	u8 rx_cls_enabled;
	struct bpf_prog *xdp_prog;
};

#define DPAA2_RXH_SUPPORTED	(RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \
+8 −11
Original line number Diff line number Diff line
@@ -45,6 +45,9 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = {
	"[drv] dequeue portal busy",
	"[drv] channel pull errors",
	"[drv] cdan",
	"[drv] xdp drop",
	"[drv] xdp tx",
	"[drv] xdp tx errors",
};

#define DPAA2_ETH_NUM_EXTRA_STATS	ARRAY_SIZE(dpaa2_ethtool_extras)
@@ -174,8 +177,6 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
	int j, k, err;
	int num_cnt;
	union dpni_statistics dpni_stats;
	u64 cdan = 0;
	u64 portal_busy = 0, pull_err = 0;
	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
	struct dpaa2_eth_drv_stats *extras;
	struct dpaa2_eth_ch_stats *ch_stats;
@@ -212,16 +213,12 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
	}
	i += j;

	for (j = 0; j < priv->num_channels; j++) {
		ch_stats = &priv->channel[j]->stats;
		cdan += ch_stats->cdan;
		portal_busy += ch_stats->dequeue_portal_busy;
		pull_err += ch_stats->pull_err;
	/* Per-channel stats */
	for (k = 0; k < priv->num_channels; k++) {
		ch_stats = &priv->channel[k]->stats;
		for (j = 0; j < sizeof(*ch_stats) / sizeof(__u64); j++)
			*((__u64 *)data + i + j) += *((__u64 *)ch_stats + j);
	}

	*(data + i++) = portal_busy;
	*(data + i++) = pull_err;
	*(data + i++) = cdan;
}

static int prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,