Commit 176bd861 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'rds-ipv6'



Ka-Cheong Poon says:

====================
rds: IPv6 support

This patch set adds IPv6 support to the kernel RDS and related
modules.  Existing RDS apps using IPv4 address continue to run without
any problem.  New RDS apps which want to use IPv6 address can do so by
passing the address in struct sockaddr_in6 to bind(), connect() or
sendmsg().  And those apps also need to use the new IPv6 equivalents
of some of the existing socket options as the existing options use a
32 bit integer to store IP address.

All RDS code now use struct in6_addr to store IP address.  IPv4
address is stored as an IPv4 mapped address.

Header file changes

There are many data structures (RDS socket options) used by RDS apps
which use a 32 bit integer to store IP address. To support IPv6,
struct in6_addr needs to be used. To ensure backward compatibility, a
new data structure is introduced for each of those data structures
which use a 32 bit integer to represent an IP address. And new socket
options are introduced to use those new structures. This means that
existing apps should work without a problem with the new RDS module.
For apps which want to use IPv6, those new data structures and socket
options can be used. IPv4 mapped address is used to represent IPv4
address in the new data structures.

Internally, all RDS data structures which contain an IP address are
changed to use struct in6_addr to store the address. IPv4 address is
stored as an IPv4 mapped address. All the functions which take an IP
address as argument are also changed to use struct in6_addr.

RDS/RDMA/IB uses a private data (struct rds_ib_connect_private)
exchange between endpoints at RDS connection establishment time to
support RDMA. This private data exchange uses a 32 bit integer to
represent an IP address. This needs to be changed in order to support
IPv6. A new private data struct rds6_ib_connect_private is introduced
to handle this. To ensure backward compatibility, an IPv6 capable RDS
stack uses another RDMA listener port (RDS_CM_PORT) to accept IPv6
connection. And it continues to use the original RDS_PORT for IPv4 RDS
connections. When it needs to communicate with an IPv6 peer, it uses
the RDS_TCP_PORT to send the connection set up request.

RDS/TCP changes

TCP related code is changed to support IPv6.  Note that only an IPv6
TCP listener on port RDS_TCP_PORT is created as it can accept both
IPv4 and IPv6 connection requests.

IB/RDMA changes

The initial private data exchange between IB endpoints using RDMA is
changed to support IPv6 address instead, if the peer address is IPv6.
To ensure backward compatibility, annother RDMA listener port
(RDS_CM_PORT) is used to accept IPv6 connection. An IPv6 capable RDS
module continues to use the original RDS_PORT for IPv4 RDS
connections. When it needs to communicate with an IPv6 peer, it uses
the RDS_CM_PORT to send the connection set up request.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a6c90dd3 b7ff8b10
Loading
Loading
Loading
Loading
+67 −2
Original line number Diff line number Diff line
/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
/*
 * Copyright (c) 2008 Oracle.  All rights reserved.
 * Copyright (c) 2008, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
@@ -118,7 +118,17 @@
#define RDS_INFO_IB_CONNECTIONS		10008
#define RDS_INFO_CONNECTION_STATS	10009
#define RDS_INFO_IWARP_CONNECTIONS	10010
#define RDS_INFO_LAST			10010

/* PF_RDS6 options */
#define RDS6_INFO_CONNECTIONS		10011
#define RDS6_INFO_SEND_MESSAGES		10012
#define RDS6_INFO_RETRANS_MESSAGES	10013
#define RDS6_INFO_RECV_MESSAGES		10014
#define RDS6_INFO_SOCKETS		10015
#define RDS6_INFO_TCP_SOCKETS		10016
#define RDS6_INFO_IB_CONNECTIONS	10017

#define RDS_INFO_LAST			10017

struct rds_info_counter {
	__u8	name[32];
@@ -140,6 +150,15 @@ struct rds_info_connection {
	__u8		flags;
} __attribute__((packed));

struct rds6_info_connection {
	__u64		next_tx_seq;
	__u64		next_rx_seq;
	struct in6_addr	laddr;
	struct in6_addr	faddr;
	__u8		transport[TRANSNAMSIZ];		/* null term ascii */
	__u8		flags;
} __attribute__((packed));

#define RDS_INFO_MESSAGE_FLAG_ACK               0x01
#define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02

@@ -153,6 +172,17 @@ struct rds_info_message {
	__u8		flags;
} __attribute__((packed));

struct rds6_info_message {
	__u64	seq;
	__u32	len;
	struct in6_addr	laddr;
	struct in6_addr	faddr;
	__be16		lport;
	__be16		fport;
	__u8		flags;
	__u8		tos;
} __attribute__((packed));

struct rds_info_socket {
	__u32		sndbuf;
	__be32		bound_addr;
@@ -163,6 +193,16 @@ struct rds_info_socket {
	__u64		inum;
} __attribute__((packed));

struct rds6_info_socket {
	__u32		sndbuf;
	struct in6_addr	bound_addr;
	struct in6_addr	connected_addr;
	__be16		bound_port;
	__be16		connected_port;
	__u32		rcvbuf;
	__u64		inum;
} __attribute__((packed));

struct rds_info_tcp_socket {
	__be32          local_addr;
	__be16          local_port;
@@ -175,6 +215,18 @@ struct rds_info_tcp_socket {
	__u32           last_seen_una;
} __attribute__((packed));

struct rds6_info_tcp_socket {
	struct in6_addr	local_addr;
	__be16		local_port;
	struct in6_addr	peer_addr;
	__be16		peer_port;
	__u64		hdr_rem;
	__u64		data_rem;
	__u32		last_sent_nxt;
	__u32		last_expected_una;
	__u32		last_seen_una;
} __attribute__((packed));

#define RDS_IB_GID_LEN	16
struct rds_info_rdma_connection {
	__be32		src_addr;
@@ -189,6 +241,19 @@ struct rds_info_rdma_connection {
	__u32		rdma_mr_size;
};

struct rds6_info_rdma_connection {
	struct in6_addr	src_addr;
	struct in6_addr	dst_addr;
	__u8		src_gid[RDS_IB_GID_LEN];
	__u8		dst_gid[RDS_IB_GID_LEN];

	__u32		max_send_wr;
	__u32		max_recv_wr;
	__u32		max_send_sge;
	__u32		rdma_mr_max;
	__u32		rdma_mr_size;
};

/* RDS message Receive Path Latency points */
enum rds_message_rxpath_latency {
	RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
+162 −39
Original line number Diff line number Diff line
/*
 * Copyright (c) 2006 Oracle.  All rights reserved.
 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
@@ -35,6 +35,7 @@
#include <linux/kernel.h>
#include <linux/gfp.h>
#include <linux/in.h>
#include <linux/ipv6.h>
#include <linux/poll.h>
#include <net/sock.h>

@@ -113,28 +114,82 @@ void rds_wake_sk_sleep(struct rds_sock *rs)
static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
		       int peer)
{
	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
	struct rds_sock *rs = rds_sk_to_rs(sock->sk);

	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
	struct sockaddr_in6 *sin6;
	struct sockaddr_in *sin;
	int uaddr_len;

	/* racey, don't care */
	if (peer) {
		if (!rs->rs_conn_addr)
		if (ipv6_addr_any(&rs->rs_conn_addr))
			return -ENOTCONN;

		if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) {
			sin = (struct sockaddr_in *)uaddr;
			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
			sin->sin_family = AF_INET;
			sin->sin_port = rs->rs_conn_port;
		sin->sin_addr.s_addr = rs->rs_conn_addr;
			sin->sin_addr.s_addr = rs->rs_conn_addr_v4;
			uaddr_len = sizeof(*sin);
		} else {
		sin->sin_port = rs->rs_bound_port;
		sin->sin_addr.s_addr = rs->rs_bound_addr;
			sin6 = (struct sockaddr_in6 *)uaddr;
			sin6->sin6_family = AF_INET6;
			sin6->sin6_port = rs->rs_conn_port;
			sin6->sin6_addr = rs->rs_conn_addr;
			sin6->sin6_flowinfo = 0;
			/* scope_id is the same as in the bound address. */
			sin6->sin6_scope_id = rs->rs_bound_scope_id;
			uaddr_len = sizeof(*sin6);
		}
	} else {
		/* If socket is not yet bound and the socket is connected,
		 * set the return address family to be the same as the
		 * connected address, but with 0 address value.  If it is not
		 * connected, set the family to be AF_UNSPEC (value 0) and
		 * the address size to be that of an IPv4 address.
		 */
		if (ipv6_addr_any(&rs->rs_bound_addr)) {
			if (ipv6_addr_any(&rs->rs_conn_addr)) {
				sin = (struct sockaddr_in *)uaddr;
				memset(sin, 0, sizeof(*sin));
				sin->sin_family = AF_UNSPEC;
				return sizeof(*sin);
			}

			if (ipv6_addr_type(&rs->rs_conn_addr) &
			    IPV6_ADDR_MAPPED) {
				sin = (struct sockaddr_in *)uaddr;
				memset(sin, 0, sizeof(*sin));
				sin->sin_family = AF_INET;

				return sizeof(*sin);
			}

			sin6 = (struct sockaddr_in6 *)uaddr;
			memset(sin6, 0, sizeof(*sin6));
			sin6->sin6_family = AF_INET6;
			return sizeof(*sin6);
		}
		if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
			sin = (struct sockaddr_in *)uaddr;
			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
			sin->sin_family = AF_INET;
			sin->sin_port = rs->rs_bound_port;
			sin->sin_addr.s_addr = rs->rs_bound_addr_v4;
			uaddr_len = sizeof(*sin);
		} else {
			sin6 = (struct sockaddr_in6 *)uaddr;
			sin6->sin6_family = AF_INET6;
			sin6->sin6_port = rs->rs_bound_port;
			sin6->sin6_addr = rs->rs_bound_addr;
			sin6->sin6_flowinfo = 0;
			sin6->sin6_scope_id = rs->rs_bound_scope_id;
			uaddr_len = sizeof(*sin6);
		}
	}

	return uaddr_len;
}

/*
 * RDS' poll is without a doubt the least intuitive part of the interface,
 * as EPOLLIN and EPOLLOUT do not behave entirely as you would expect from
@@ -203,11 +258,12 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
			      int len)
{
	struct sockaddr_in6 sin6;
	struct sockaddr_in sin;
	int ret = 0;

	/* racing with another thread binding seems ok here */
	if (rs->rs_bound_addr == 0) {
	if (ipv6_addr_any(&rs->rs_bound_addr)) {
		ret = -ENOTCONN; /* XXX not a great errno */
		goto out;
	}
@@ -215,14 +271,23 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
	if (len < sizeof(struct sockaddr_in)) {
		ret = -EINVAL;
		goto out;
	} else if (len < sizeof(struct sockaddr_in6)) {
		/* Assume IPv4 */
		if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) {
			ret = -EFAULT;
			goto out;
		}

	if (copy_from_user(&sin, optval, sizeof(sin))) {
		ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr);
		sin6.sin6_port = sin.sin_port;
	} else {
		if (copy_from_user(&sin6, optval,
				   sizeof(struct sockaddr_in6))) {
			ret = -EFAULT;
			goto out;
		}
	}

	rds_send_drop_to(rs, &sin);
	rds_send_drop_to(rs, &sin6);
out:
	return ret;
}
@@ -435,31 +500,87 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
		       int addr_len, int flags)
{
	struct sock *sk = sock->sk;
	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
	struct sockaddr_in *sin;
	struct sockaddr_in6 *sin6;
	struct rds_sock *rs = rds_sk_to_rs(sk);
	int addr_type;
	int ret = 0;

	lock_sock(sk);

	if (addr_len != sizeof(struct sockaddr_in)) {
	switch (uaddr->sa_family) {
	case AF_INET:
		sin = (struct sockaddr_in *)uaddr;
		if (addr_len < sizeof(struct sockaddr_in)) {
			ret = -EINVAL;
		goto out;
			break;
		}
		if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
			ret = -EDESTADDRREQ;
			break;
		}
		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) ||
		    sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) {
			ret = -EINVAL;
			break;
		}
		ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr);
		rs->rs_conn_port = sin->sin_port;
		break;

	if (sin->sin_family != AF_INET) {
		ret = -EAFNOSUPPORT;
		goto out;
	case AF_INET6:
		sin6 = (struct sockaddr_in6 *)uaddr;
		if (addr_len < sizeof(struct sockaddr_in6)) {
			ret = -EINVAL;
			break;
		}
		addr_type = ipv6_addr_type(&sin6->sin6_addr);
		if (!(addr_type & IPV6_ADDR_UNICAST)) {
			__be32 addr4;

	if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
		ret = -EDESTADDRREQ;
		goto out;
			if (!(addr_type & IPV6_ADDR_MAPPED)) {
				ret = -EPROTOTYPE;
				break;
			}

	rs->rs_conn_addr = sin->sin_addr.s_addr;
	rs->rs_conn_port = sin->sin_port;
			/* It is a mapped address.  Need to do some sanity
			 * checks.
			 */
			addr4 = sin6->sin6_addr.s6_addr32[3];
			if (addr4 == htonl(INADDR_ANY) ||
			    addr4 == htonl(INADDR_BROADCAST) ||
			    IN_MULTICAST(ntohl(addr4))) {
				ret = -EPROTOTYPE;
				break;
			}
		}

		if (addr_type & IPV6_ADDR_LINKLOCAL) {
			/* If socket is arleady bound to a link local address,
			 * the peer address must be on the same link.
			 */
			if (sin6->sin6_scope_id == 0 ||
			    (!ipv6_addr_any(&rs->rs_bound_addr) &&
			     rs->rs_bound_scope_id &&
			     sin6->sin6_scope_id != rs->rs_bound_scope_id)) {
				ret = -EINVAL;
				break;
			}
			/* Remember the connected address scope ID.  It will
			 * be checked against the binding local address when
			 * the socket is bound.
			 */
			rs->rs_bound_scope_id = sin6->sin6_scope_id;
		}
		rs->rs_conn_addr = sin6->sin6_addr;
		rs->rs_conn_port = sin6->sin6_port;
		break;

	default:
		ret = -EAFNOSUPPORT;
		break;
	}

out:
	release_sock(sk);
	return ret;
}
@@ -578,8 +699,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
		list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
			total++;
			if (total <= len)
				rds_inc_info_copy(inc, iter, inc->i_saddr,
						  rs->rs_bound_addr, 1);
				rds_inc_info_copy(inc, iter,
						  inc->i_saddr.s6_addr32[3],
						  rs->rs_bound_addr_v4,
						  1);
		}

		read_unlock(&rs->rs_recv_lock);
@@ -608,8 +731,8 @@ static void rds_sock_info(struct socket *sock, unsigned int len,
	list_for_each_entry(rs, &rds_sock_list, rs_item) {
		sinfo.sndbuf = rds_sk_sndbuf(rs);
		sinfo.rcvbuf = rds_sk_rcvbuf(rs);
		sinfo.bound_addr = rs->rs_bound_addr;
		sinfo.connected_addr = rs->rs_conn_addr;
		sinfo.bound_addr = rs->rs_bound_addr_v4;
		sinfo.connected_addr = rs->rs_conn_addr_v4;
		sinfo.bound_port = rs->rs_bound_port;
		sinfo.connected_port = rs->rs_conn_port;
		sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
+106 −30
Original line number Diff line number Diff line
/*
 * Copyright (c) 2006 Oracle.  All rights reserved.
 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
@@ -33,6 +33,7 @@
#include <linux/kernel.h>
#include <net/sock.h>
#include <linux/in.h>
#include <linux/ipv6.h>
#include <linux/if_arp.h>
#include <linux/jhash.h>
#include <linux/ratelimit.h>
@@ -42,42 +43,58 @@ static struct rhashtable bind_hash_table;

static const struct rhashtable_params ht_parms = {
	.nelem_hint = 768,
	.key_len = sizeof(u64),
	.key_len = RDS_BOUND_KEY_LEN,
	.key_offset = offsetof(struct rds_sock, rs_bound_key),
	.head_offset = offsetof(struct rds_sock, rs_bound_node),
	.max_size = 16384,
	.min_size = 1024,
};

/* Create a key for the bind hash table manipulation.  Port is in network byte
 * order.
 */
static inline void __rds_create_bind_key(u8 *key, const struct in6_addr *addr,
					 __be16 port, __u32 scope_id)
{
	memcpy(key, addr, sizeof(*addr));
	key += sizeof(*addr);
	memcpy(key, &port, sizeof(port));
	key += sizeof(port);
	memcpy(key, &scope_id, sizeof(scope_id));
}

/*
 * Return the rds_sock bound at the given local address.
 *
 * The rx path can race with rds_release.  We notice if rds_release() has
 * marked this socket and don't return a rs ref to the rx path.
 */
struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
				__u32 scope_id)
{
	u64 key = ((u64)addr << 32) | port;
	u8 key[RDS_BOUND_KEY_LEN];
	struct rds_sock *rs;

	rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms);
	__rds_create_bind_key(key, addr, port, scope_id);
	rs = rhashtable_lookup_fast(&bind_hash_table, key, ht_parms);
	if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
		rds_sock_addref(rs);
	else
		rs = NULL;

	rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
	rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr,
		 ntohs(port));

	return rs;
}

/* returns -ve errno or +ve port */
static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr,
			 __be16 *port, __u32 scope_id)
{
	int ret = -EADDRINUSE;
	u16 rover, last;
	u64 key;
	u8 key[RDS_BOUND_KEY_LEN];

	if (*port != 0) {
		rover = be16_to_cpu(*port);
@@ -95,12 +112,13 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)

		if (rover == RDS_FLAG_PROBE_PORT)
			continue;
		key = ((u64)addr << 32) | cpu_to_be16(rover);
		if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms))
		__rds_create_bind_key(key, addr, cpu_to_be16(rover),
				      scope_id);
		if (rhashtable_lookup_fast(&bind_hash_table, key, ht_parms))
			continue;

		rs->rs_bound_key = key;
		rs->rs_bound_addr = addr;
		memcpy(rs->rs_bound_key, key, sizeof(rs->rs_bound_key));
		rs->rs_bound_addr = *addr;
		net_get_random_once(&rs->rs_hash_initval,
				    sizeof(rs->rs_hash_initval));
		rs->rs_bound_port = cpu_to_be16(rover);
@@ -109,12 +127,13 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
		if (!rhashtable_insert_fast(&bind_hash_table,
					    &rs->rs_bound_node, ht_parms)) {
			*port = rs->rs_bound_port;
			rs->rs_bound_scope_id = scope_id;
			ret = 0;
			rdsdebug("rs %p binding to %pI4:%d\n",
			  rs, &addr, (int)ntohs(*port));
			rdsdebug("rs %p binding to %pI6c:%d\n",
				 rs, addr, (int)ntohs(*port));
			break;
		} else {
			rs->rs_bound_addr = 0;
			rs->rs_bound_addr = in6addr_any;
			rds_sock_put(rs);
			ret = -ENOMEM;
			break;
@@ -127,44 +146,101 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
void rds_remove_bound(struct rds_sock *rs)
{

	if (!rs->rs_bound_addr)
	if (ipv6_addr_any(&rs->rs_bound_addr))
		return;

	rdsdebug("rs %p unbinding from %pI4:%d\n",
	rdsdebug("rs %p unbinding from %pI6c:%d\n",
		 rs, &rs->rs_bound_addr,
		 ntohs(rs->rs_bound_port));

	rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms);
	rds_sock_put(rs);
	rs->rs_bound_addr = 0;
	rs->rs_bound_addr = in6addr_any;
}

int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
	struct sock *sk = sock->sk;
	struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
	struct rds_sock *rs = rds_sk_to_rs(sk);
	struct in6_addr v6addr, *binding_addr;
	struct rds_transport *trans;
	__u32 scope_id = 0;
	int addr_type;
	int ret = 0;
	__be16 port;

	/* We allow an RDS socket to be bound to either IPv4 or IPv6
	 * address.
	 */
	if (uaddr->sa_family == AF_INET) {
		struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;

		if (addr_len < sizeof(struct sockaddr_in) ||
		    sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
		    sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
		    IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
			return -EINVAL;
		ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
		binding_addr = &v6addr;
		port = sin->sin_port;
	} else if (uaddr->sa_family == AF_INET6) {
		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr;

		if (addr_len < sizeof(struct sockaddr_in6))
			return -EINVAL;
		addr_type = ipv6_addr_type(&sin6->sin6_addr);
		if (!(addr_type & IPV6_ADDR_UNICAST)) {
			__be32 addr4;

			if (!(addr_type & IPV6_ADDR_MAPPED))
				return -EINVAL;

			/* It is a mapped address.  Need to do some sanity
			 * checks.
			 */
			addr4 = sin6->sin6_addr.s6_addr32[3];
			if (addr4 == htonl(INADDR_ANY) ||
			    addr4 == htonl(INADDR_BROADCAST) ||
			    IN_MULTICAST(ntohl(addr4)))
				return -EINVAL;
		}
		/* The scope ID must be specified for link local address. */
		if (addr_type & IPV6_ADDR_LINKLOCAL) {
			if (sin6->sin6_scope_id == 0)
				return -EINVAL;
			scope_id = sin6->sin6_scope_id;
		}
		binding_addr = &sin6->sin6_addr;
		port = sin6->sin6_port;
	} else {
		return -EINVAL;
	}
	lock_sock(sk);

	if (addr_len != sizeof(struct sockaddr_in) ||
	    sin->sin_family != AF_INET ||
	    rs->rs_bound_addr ||
	    sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
	/* RDS socket does not allow re-binding. */
	if (!ipv6_addr_any(&rs->rs_bound_addr)) {
		ret = -EINVAL;
		goto out;
	}
	/* Socket is connected.  The binding address should have the same
	 * scope ID as the connected address, except the case when one is
	 * non-link local address (scope_id is 0).
	 */
	if (!ipv6_addr_any(&rs->rs_conn_addr) && scope_id &&
	    rs->rs_bound_scope_id &&
	    scope_id != rs->rs_bound_scope_id) {
		ret = -EINVAL;
		goto out;
	}

	ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port);
	ret = rds_add_bound(rs, binding_addr, &port, scope_id);
	if (ret)
		goto out;

	if (rs->rs_transport) { /* previously bound */
		trans = rs->rs_transport;
		if (trans->laddr_check(sock_net(sock->sk),
				       sin->sin_addr.s_addr) != 0) {
				       binding_addr, scope_id) != 0) {
			ret = -ENOPROTOOPT;
			rds_remove_bound(rs);
		} else {
@@ -172,13 +248,13 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
		}
		goto out;
	}
	trans = rds_trans_get_preferred(sock_net(sock->sk),
					sin->sin_addr.s_addr);
	trans = rds_trans_get_preferred(sock_net(sock->sk), binding_addr,
					scope_id);
	if (!trans) {
		ret = -EADDRNOTAVAIL;
		rds_remove_bound(rs);
		pr_info_ratelimited("RDS: %s could not find a transport for %pI4, load rds_tcp or rds_rdma?\n",
				    __func__, &sin->sin_addr.s_addr);
		pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n",
				    __func__, binding_addr);
		goto out;
	}

+13 −10
Original line number Diff line number Diff line
/*
 * Copyright (c) 2007 Oracle.  All rights reserved.
 * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
@@ -101,7 +101,7 @@ static DEFINE_RWLOCK(rds_cong_monitor_lock);
static DEFINE_SPINLOCK(rds_cong_lock);
static struct rb_root rds_cong_tree = RB_ROOT;

static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
static struct rds_cong_map *rds_cong_tree_walk(const struct in6_addr *addr,
					       struct rds_cong_map *insert)
{
	struct rb_node **p = &rds_cong_tree.rb_node;
@@ -109,12 +109,15 @@ static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
	struct rds_cong_map *map;

	while (*p) {
		int diff;

		parent = *p;
		map = rb_entry(parent, struct rds_cong_map, m_rb_node);

		if (addr < map->m_addr)
		diff = rds_addr_cmp(addr, &map->m_addr);
		if (diff < 0)
			p = &(*p)->rb_left;
		else if (addr > map->m_addr)
		else if (diff > 0)
			p = &(*p)->rb_right;
		else
			return map;
@@ -132,7 +135,7 @@ static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
 * these bitmaps in the process getting pointers to them.  The bitmaps are only
 * ever freed as the module is removed after all connections have been freed.
 */
static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
static struct rds_cong_map *rds_cong_from_addr(const struct in6_addr *addr)
{
	struct rds_cong_map *map;
	struct rds_cong_map *ret = NULL;
@@ -144,7 +147,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
	if (!map)
		return NULL;

	map->m_addr = addr;
	map->m_addr = *addr;
	init_waitqueue_head(&map->m_waitq);
	INIT_LIST_HEAD(&map->m_conn_list);

@@ -171,7 +174,7 @@ out:
		kfree(map);
	}

	rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr));
	rdsdebug("map %p for addr %pI6c\n", ret, addr);

	return ret;
}
@@ -202,8 +205,8 @@ void rds_cong_remove_conn(struct rds_connection *conn)

int rds_cong_get_maps(struct rds_connection *conn)
{
	conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
	conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
	conn->c_lcong = rds_cong_from_addr(&conn->c_laddr);
	conn->c_fcong = rds_cong_from_addr(&conn->c_faddr);

	if (!(conn->c_lcong && conn->c_fcong))
		return -ENOMEM;
@@ -353,7 +356,7 @@ void rds_cong_remove_socket(struct rds_sock *rs)

	/* update congestion map for now-closed port */
	spin_lock_irqsave(&rds_cong_lock, flags);
	map = rds_cong_tree_walk(rs->rs_bound_addr, NULL);
	map = rds_cong_tree_walk(&rs->rs_bound_addr, NULL);
	spin_unlock_irqrestore(&rds_cong_lock, flags);

	if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
+199 −60

File changed.

Preview size limit exceeded, changes collapsed.

Loading