Commit 6991abcb authored by Kaike Wan's avatar Kaike Wan Committed by Jason Gunthorpe
Browse files

IB/hfi1: Add functions to receive accelerated ipoib packets

Ipoib netdev will share receive contexts with existing VNIC netdev.
To achieve that, a dummy netdev is allocated with hfi1_devdata to
own the receive contexts, and ipoib and VNIC netdevs will be put
on top of it. Each receive context is associated with a single
NAPI object.

This patch adds the functions to receive incoming packets for
accelerated ipoib.

Link: https://lore.kernel.org/r/20200511160631.173205.54184.stgit@awfm-01.aw.intel.com


Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarSadanand Warrier <sadanand.warrier@intel.com>
Signed-off-by: default avatarGrzegorz Andrejczuk <grzegorz.andrejczuk@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 89dcaa36
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -23,10 +23,12 @@ hfi1-y := \
	intr.o \
	iowait.o \
	ipoib_main.o \
	ipoib_rx.o \
	ipoib_tx.o \
	mad.o \
	mmu_rb.o \
	msix.o \
	netdev_rx.o \
	opfn.o \
	pcie.o \
	pio.o \
+91 −1
Original line number Diff line number Diff line
/*
 * Copyright(c) 2015-2018 Intel Corporation.
 * Copyright(c) 2015-2020 Intel Corporation.
 *
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 * redistributing this file, you may do so under either license.
@@ -54,6 +54,7 @@
#include <linux/module.h>
#include <linux/prefetch.h>
#include <rdma/ib_verbs.h>
#include <linux/etherdevice.h>

#include "hfi.h"
#include "trace.h"
@@ -63,6 +64,9 @@
#include "vnic.h"
#include "fault.h"

#include "ipoib.h"
#include "netdev.h"

#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt

@@ -1550,6 +1554,81 @@ void handle_eflags(struct hfi1_packet *packet)
		show_eflags_errs(packet);
}

static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet)
{
	struct hfi1_ibport *ibp;
	struct net_device *netdev;
	struct hfi1_ctxtdata *rcd = packet->rcd;
	struct napi_struct *napi = rcd->napi;
	struct sk_buff *skb;
	struct hfi1_netdev_rxq *rxq = container_of(napi,
			struct hfi1_netdev_rxq, napi);
	u32 extra_bytes;
	u32 tlen, qpnum;
	bool do_work, do_cnp;
	struct hfi1_ipoib_dev_priv *priv;

	trace_hfi1_rcvhdr(packet);

	hfi1_setup_ib_header(packet);

	packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth;
	packet->grh = NULL;

	if (unlikely(rhf_err_flags(packet->rhf))) {
		handle_eflags(packet);
		return;
	}

	qpnum = ib_bth_get_qpn(packet->ohdr);
	netdev = hfi1_netdev_get_data(rcd->dd, qpnum);
	if (!netdev)
		goto drop_no_nd;

	trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));

	/* handle congestion notifications */
	do_work = hfi1_may_ecn(packet);
	if (unlikely(do_work)) {
		do_cnp = (packet->opcode != IB_OPCODE_CNP);
		(void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp,
						 packet, do_cnp);
	}

	/*
	 * We have split point after last byte of DETH
	 * lets strip padding and CRC and ICRC.
	 * tlen is whole packet len so we need to
	 * subtract header size as well.
	 */
	tlen = packet->tlen;
	extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) +
			packet->hlen;
	if (unlikely(tlen < extra_bytes))
		goto drop;

	tlen -= extra_bytes;

	skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf);
	if (unlikely(!skb))
		goto drop;

	priv = hfi1_ipoib_priv(netdev);
	hfi1_ipoib_update_rx_netstats(priv, 1, skb->len);

	skb->dev = netdev;
	skb->pkt_type = PACKET_HOST;
	netif_receive_skb(skb);

	return;

drop:
	++netdev->stats.rx_dropped;
drop_no_nd:
	ibp = rcd_to_iport(packet->rcd);
	++ibp->rvp.n_pkt_drops;
}

/*
 * The following functions are called by the interrupt handler. They are type
 * specific handlers for each packet type.
@@ -1757,3 +1836,14 @@ const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
	[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
	[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
};

const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = {
	[RHF_RCV_TYPE_EXPECTED] = process_receive_invalid,
	[RHF_RCV_TYPE_EAGER] = process_receive_invalid,
	[RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv,
	[RHF_RCV_TYPE_ERROR] = process_receive_error,
	[RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv,
	[RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
	[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
	[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
};
+4 −1
Original line number Diff line number Diff line
@@ -233,6 +233,8 @@ struct hfi1_ctxtdata {
	intr_handler fast_handler;
	/** slow handler */
	intr_handler slow_handler;
	/* napi pointer assiociated with netdev */
	struct napi_struct *napi;
	/* verbs rx_stats per rcd */
	struct hfi1_opcode_stats_perctx *opstats;
	/* clear interrupt mask */
@@ -985,7 +987,7 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp,
			      struct hfi1_pkt_state *ps,
			      struct rvt_swqe *wqe);
extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];

extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];

/* return values for the RHF receive functions */
#define RHF_RCV_CONTINUE  0	/* keep going */
@@ -1417,6 +1419,7 @@ struct hfi1_devdata {
	struct hfi1_vnic_data vnic;
	/* Lock to protect IRQ SRC register access */
	spinlock_t irq_src_lock;
	struct net_device *dummy_netdev;

	/* Keeps track of IPoIB RSM rule users */
	atomic_t ipoib_rsm_usr_num;
+18 −0
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@

#include "hfi.h"
#include "iowait.h"
#include "netdev.h"

#include <rdma/ib_verbs.h>

@@ -29,6 +30,7 @@

#define HFI1_IPOIB_TXREQ_NAME_LEN   32

#define HFI1_IPOIB_PSEUDO_LEN 20
#define HFI1_IPOIB_ENCAP_LEN 4

struct hfi1_ipoib_dev_priv;
@@ -118,6 +120,19 @@ hfi1_ipoib_priv(const struct net_device *dev)
	return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
}

static inline void
hfi1_ipoib_update_rx_netstats(struct hfi1_ipoib_dev_priv *priv,
			      u64 packets,
			      u64 bytes)
{
	struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);

	u64_stats_update_begin(&netstats->syncp);
	netstats->rx_packets += packets;
	netstats->rx_bytes += bytes;
	u64_stats_update_end(&netstats->syncp);
}

static inline void
hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv,
			      u64 packets,
@@ -142,6 +157,9 @@ void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
void hfi1_ipoib_napi_tx_disable(struct net_device *dev);

struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
				       int size, void *data);

int hfi1_ipoib_rn_get_params(struct ib_device *device,
			     u8 port_num,
			     enum rdma_netdev_t type,
+71 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
 * Copyright(c) 2020 Intel Corporation.
 *
 */

#include "netdev.h"
#include "ipoib.h"

#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN))

static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
{
	void *dst_data;

	skb_checksum_none_assert(skb);
	skb->protocol = *((__be16 *)data);

	dst_data = skb_put(skb, size);
	memcpy(dst_data, data, size);
	skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
	skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
}

static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size)
{
	struct sk_buff *skb;
	int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD);
	void *frag;

	skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
	skb_size = SKB_DATA_ALIGN(skb_size);
	frag = napi_alloc_frag(skb_size);

	if (unlikely(!frag))
		return napi_alloc_skb(napi, size);

	skb = build_skb(frag, skb_size);

	if (unlikely(!skb)) {
		skb_free_frag(frag);
		return NULL;
	}

	skb_reserve(skb, HFI1_IPOIB_SKB_PAD);
	return skb;
}

struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
				       int size, void *data)
{
	struct napi_struct *napi = &rxq->napi;
	int skb_size = size + HFI1_IPOIB_ENCAP_LEN;
	struct sk_buff *skb;

	/*
	 * For smaller(4k + skb overhead) allocations we will go using
	 * napi cache. Otherwise we will try to use napi frag cache.
	 */
	if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE))
		skb = napi_alloc_skb(napi, skb_size);
	else
		skb = prepare_frag_skb(napi, skb_size);

	if (unlikely(!skb))
		return NULL;

	copy_ipoib_buf(skb, data, size);

	return skb;
}
Loading