Commit d99dc602 authored by Gary Leshner's avatar Gary Leshner Committed by Jason Gunthorpe
Browse files

IB/hfi1: Add functions to transmit datagram ipoib packets

This patch implements the mechanism to accelerate the transmit side of
a multiple transmit queue RDMA netdev by submitting the packets to
the SDMA engine directly instead of sending through the verbs layer.

This patch also changes the UD/SEND_ONLY op to output the entropy value
in byte 0 of deth[1]. UD/SEND_ONLY_WITH_IMMEDIATE uses the previous
behavior with no entropy value being output.

The code in the ipoib rdma netdev which submits tx requests upon
successful submission will call trace_sdma_output_ibhdr to output
the ibhdr to the trace buffer.

Link: https://lore.kernel.org/r/20200511160548.173205.45616.stgit@awfm-01.aw.intel.com


Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarGary Leshner <Gary.S.Leshner@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent fe810b50
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ hfi1-y := \
	init.o \
	intr.o \
	iowait.o \
	ipoib_tx.o \
	mad.o \
	mmu_rb.o \
	msix.o \
+145 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
 * Copyright(c) 2020 Intel Corporation.
 *
 */

/*
 * This file contains HFI1 support for IPOIB functionality
 */

#ifndef HFI1_IPOIB_H
#define HFI1_IPOIB_H

#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/atomic.h>
#include <linux/netdevice.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
#include <linux/list.h>
#include <linux/if_infiniband.h>

#include "hfi.h"
#include "iowait.h"

#include <rdma/ib_verbs.h>

#define HFI1_IPOIB_ENTROPY_SHIFT   24

#define HFI1_IPOIB_TXREQ_NAME_LEN   32

#define HFI1_IPOIB_ENCAP_LEN 4

struct hfi1_ipoib_dev_priv;

union hfi1_ipoib_flow {
	u16 as_int;
	struct {
		u8 tx_queue;
		u8 sc5;
	} __attribute__((__packed__));
};

/**
 * struct hfi1_ipoib_circ_buf - List of items to be processed
 * @items: ring of items
 * @head: ring head
 * @tail: ring tail
 * @max_items: max items + 1 that the ring can contain
 * @producer_lock: producer sync lock
 * @consumer_lock: consumer sync lock
 */
struct hfi1_ipoib_circ_buf {
	void **items;
	unsigned long head;
	unsigned long tail;
	unsigned long max_items;
	spinlock_t producer_lock; /* head sync lock */
	spinlock_t consumer_lock; /* tail sync lock */
};

/**
 * struct hfi1_ipoib_txq - IPOIB per Tx queue information
 * @priv: private pointer
 * @sde: sdma engine
 * @tx_list: tx request list
 * @sent_txreqs: count of txreqs posted to sdma
 * @flow: tracks when list needs to be flushed for a flow change
 * @q_idx: ipoib Tx queue index
 * @pkts_sent: indicator packets have been sent from this queue
 * @wait: iowait structure
 * @complete_txreqs: count of txreqs completed by sdma
 * @napi: pointer to tx napi interface
 * @tx_ring: ring of ipoib txreqs to be reaped by napi callback
 */
struct hfi1_ipoib_txq {
	struct hfi1_ipoib_dev_priv *priv;
	struct sdma_engine *sde;
	struct list_head tx_list;
	u64 sent_txreqs;
	union hfi1_ipoib_flow flow;
	u8 q_idx;
	bool pkts_sent;
	struct iowait wait;

	atomic64_t ____cacheline_aligned_in_smp complete_txreqs;
	struct napi_struct *napi;
	struct hfi1_ipoib_circ_buf tx_ring;
};

struct hfi1_ipoib_dev_priv {
	struct hfi1_devdata *dd;
	struct net_device   *netdev;
	struct ib_device    *device;
	struct hfi1_ipoib_txq *txqs;
	struct kmem_cache *txreq_cache;
	struct napi_struct *tx_napis;
	u16 pkey;
	u16 pkey_index;
	u32 qkey;
	u8 port_num;

	const struct net_device_ops *netdev_ops;
	struct rvt_qp *qp;
	struct pcpu_sw_netstats __percpu *netstats;
};

/* hfi1 ipoib rdma netdev's private data structure */
struct hfi1_ipoib_rdma_netdev {
	struct rdma_netdev rn;  /* keep this first */
	/* followed by device private data */
	struct hfi1_ipoib_dev_priv dev_priv;
};

static inline struct hfi1_ipoib_dev_priv *
hfi1_ipoib_priv(const struct net_device *dev)
{
	return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
}

static inline void
hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv,
			      u64 packets,
			      u64 bytes)
{
	struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);

	u64_stats_update_begin(&netstats->syncp);
	netstats->tx_packets += packets;
	netstats->tx_bytes += bytes;
	u64_stats_update_end(&netstats->syncp);
}

int hfi1_ipoib_send_dma(struct net_device *dev,
			struct sk_buff *skb,
			struct ib_ah *address,
			u32 dqpn);

int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv);
void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);

void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
void hfi1_ipoib_napi_tx_disable(struct net_device *dev);

#endif /* _IPOIB_H */
+828 −0

File added.

Preview size limit exceeded, changes collapsed.

+9 −1
Original line number Diff line number Diff line
/*
 * Copyright(c) 2015 - 2018 Intel Corporation.
 * Copyright(c) 2015 - 2020 Intel Corporation.
 *
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 * redistributing this file, you may do so under either license.
@@ -47,6 +47,7 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "exp_rcv.h"
#include "ipoib.h"

static u8 __get_ib_hdr_len(struct ib_header *hdr)
{
@@ -126,6 +127,7 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
#define DETH_ENTROPY_PRN "deth qkey:0x%.8x sqpn:0x%.6x entropy:0x%.2x"
#define IETH_PRN "ieth rkey:0x%.8x"
#define ATOMICACKETH_PRN "origdata:%llx"
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
@@ -444,6 +446,12 @@ const char *parse_everbs_hdrs(
		break;
	/* deth */
	case OP(UD, SEND_ONLY):
		trace_seq_printf(p, DETH_ENTROPY_PRN,
				 be32_to_cpu(eh->ud.deth[0]),
				 be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK,
				 be32_to_cpu(eh->ud.deth[1]) >>
					     HFI1_IPOIB_ENTROPY_SHIFT);
		break;
	case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
		trace_seq_printf(p, DETH_PRN,
				 be32_to_cpu(eh->ud.deth[0]),
+1 −0
Original line number Diff line number Diff line
@@ -2205,6 +2205,7 @@ struct rdma_netdev {
	void              *clnt_priv;
	struct ib_device  *hca;
	u8                 port_num;
	int                mtu;

	/*
	 * cleanup function must be specified.