Commit 45f01fb3 authored by Jan Moskyto Matejka's avatar Jan Moskyto Matejka
Browse files

Netlink: MPLS routes in kernel, not fully working yet.

TODO:
- fix static to detect MPLS stack changes on reload
- fix MPLS_ENCAP parsing on route scan

Anyway, Bird is now capable to insert both MPLS routes and MPLS encap
routes into kernel.

It was (among others) needed to define platform-specific AF_MPLS to 28
as this constant has been assigned in the linux kernel.

No support for BSD now, it may be added in the future.
parent 21d358f0
Loading
Loading
Loading
Loading
+43 −0
Original line number Diff line number Diff line
@@ -11,10 +11,53 @@
#define _BIRD_MPLS_H_

#define MPLS_STACK_LENGTH   8 /* Adjust this if you need deeper MPLS stack */
#define MPLS_PXLEN	    20 /* Length of the label in bits. Constant. */

/*
 *   RFC 3032 updated by RFC 5462:
 *
 *    0                   1                   2                   3
 *    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Label
 *   |                Label                  | TC  |S|       TTL     | Stack
 *   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Entry
 *
 *                       Label:  Label Value, 20 bits
 *                       TC:     Traffic Class, 3 bits
 *                       S:      Bottom of Stack, 1 bit
 *                       TTL:    Time to Live, 8 bits
 */

typedef struct mpls_stack {
  u8 len;
  u32 label[MPLS_STACK_LENGTH];
} mpls_stack;

static inline char * const mpls_hton(mpls_stack s) {
  static char buf[MPLS_STACK_LENGTH*4];
  int i;
  for (i = 0; i < s.len; i++) {
    buf[i*4 + 0] = s.label[i] >> 12;
    buf[i*4 + 1] = s.label[i] >> 4;
    buf[i*4 + 2] = (s.label[i] << 4) | (i == s.len - 1 ? 0x1 : 0);
    buf[i*4 + 3] = 0;
  }
  return buf;
}

static inline int mpls_buflen(const char *buf) {
  // Looking for the Bottom of Stack set to 4.
  int i;
  for (i = 0; !(buf[i++*4 + 2] & 0x1); );
  return i*4;
}

static inline mpls_stack mpls_ntoh(const char *buf) {
  mpls_stack s = { .len = mpls_buflen(buf) };
  int i;
  for (i = 0; i < s.len; i++)
    s.label[i] = (buf[i*4 + 0] << 12) | (buf[i*4 + 1] << 4) | (buf[i*4 + 2] >> 4);
  return s;
}

#endif
+14 −1
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#define _BIRD_NET_H_

#include "lib/ip.h"
#include "lib/mpls.h"


#define NET_IP4		1
@@ -134,7 +135,7 @@ extern const u16 net_max_text_length[];
  ((net_addr_roa6) { NET_ROA6, pxlen, sizeof(net_addr_roa6), prefix, max_pxlen, asn })

#define NET_ADDR_MPLS(label) \
  ((net_addr_mpls) { NET_MPLS, 0, sizeof(net_addr_mpls), label })
  ((net_addr_mpls) { NET_MPLS, MPLS_PXLEN, sizeof(net_addr_mpls), label })


static inline void net_fill_ip4(net_addr *a, ip4_addr prefix, uint pxlen)
@@ -211,6 +212,18 @@ static inline ip_addr net_prefix(const net_addr *a)
  }
}

static inline mpls_stack net_mpls(const net_addr *a)
{
  mpls_stack ms;
  if (a->type == NET_MPLS) {
    ms.len = 1;
    ms.label[0] = ((net_addr_mpls *) a)->label;
    return ms;
  }

  bug("Can't call net_mpls on non-mpls net_addr");
}

static inline uint net4_pxlen(const net_addr *a)
{ return a->pxlen; }

+4 −0
Original line number Diff line number Diff line
@@ -16,6 +16,10 @@

#define CONFIG_RESTRICTED_PRIVILEGES

#ifndef AF_MPLS
#define AF_MPLS 28
#endif

/*
Link: sysdep/linux
Link: sysdep/unix
+182 −15
Original line number Diff line number Diff line
@@ -26,10 +26,12 @@
#include "lib/socket.h"
#include "lib/string.h"
#include "lib/hash.h"
#include "lib/mpls.h"
#include "conf/conf.h"

#include <asm/types.h>
#include <linux/if.h>
#include <linux/lwtunnel.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>

@@ -46,6 +48,25 @@
#define RTA_TABLE  15
#endif

#ifndef RTA_VIA
#define RTA_VIA	 18
#endif

#ifndef RTA_NEWDST
#define RTA_NEWDST  19
#endif

#ifndef RTA_ENCAP_TYPE
#define RTA_ENCAP_TYPE	21
#endif

#ifndef RTA_ENCAP
#define RTA_ENCAP  22
#endif

#ifndef AF_MPLS
#define AF_MPLS	 28
#endif

/*
 *	Synchronous Netlink interface
@@ -256,7 +277,7 @@ static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
};


#define BIRD_RTA_MAX  (RTA_TABLE+1)
#define BIRD_RTA_MAX  (RTA_ENCAP+1)

static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = {
  [RTA_GATEWAY]	  = { 1, 1, sizeof(ip4_addr) },
@@ -272,6 +293,8 @@ static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
  [RTA_MULTIPATH] = { 1, 0, 0 },
  [RTA_FLOW]	  = { 1, 1, sizeof(u32) },
  [RTA_TABLE]	  = { 1, 1, sizeof(u32) },
  [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
  [RTA_ENCAP]	  = { 1, 0, 0 },
};

static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
@@ -284,6 +307,20 @@ static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
  [RTA_METRICS]	  = { 1, 0, 0 },
  [RTA_FLOW]	  = { 1, 1, sizeof(u32) },
  [RTA_TABLE]	  = { 1, 1, sizeof(u32) },
  [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
  [RTA_ENCAP]	  = { 1, 0, 0 },
};

static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
  [RTA_DST]	  = { 1, 1, sizeof(u32) },
  [RTA_IIF]	  = { 1, 1, sizeof(u32) },
  [RTA_OIF]	  = { 1, 1, sizeof(u32) },
  [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
  [RTA_METRICS]	  = { 1, 0, 0 },
  [RTA_FLOW]	  = { 1, 1, sizeof(u32) },
  [RTA_TABLE]	  = { 1, 1, sizeof(u32) },
  [RTA_VIA]	  = { 1, 0, 0 },
  [RTA_NEWDST]	  = { 1, 0, 0 },
};


@@ -333,6 +370,30 @@ static inline ip_addr rta_get_ipa(struct rtattr *a)
    return ipa_from_ip6(rta_get_ip6(a));
}

static inline ip_addr rta_get_via(struct rtattr *a)
{
  struct rtvia *v = RTA_DATA(a);
  switch(v->rtvia_family) {
    case AF_INET:  return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
    case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
  }
  return IPA_NONE;
}

static inline mpls_stack rta_get_mpls(struct rtattr *a)
{
  mpls_stack ms = { .len = RTA_PAYLOAD(a) / 4 };
  for (int i = 0; i < ms.len; i++) {
    ms.label[i] = (((u8 *)RTA_DATA(a))[i*4 + 0] << 12)
		| (((u8 *)RTA_DATA(a))[i*4 + 1] <<  4)
		| (((u8 *)RTA_DATA(a))[i*4 + 2] >>  4);

    if (!!(((u8 *)RTA_DATA(a))[i*4 + 2] & 1) != (i+1 == ms.len))
      log(L_WARN "KRT: Received a route with mismatched MPLS BoS bit, ignoring");
  }
  return ms;
}

struct rtattr *
nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
{
@@ -353,6 +414,24 @@ nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint
  return a;
}

static inline struct rtattr *
nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
{
  return nl_add_attr(h, bufsize, code, NULL, 0);
}

static inline void
nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
{
  a->rta_len = (void *)h + h->nlmsg_len - (void *)a;
}

static inline void
nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
{
  nl_add_attr(h, bufsize, code, &data, 2);
}

static inline void
nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
{
@@ -382,16 +461,46 @@ nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
    nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
}

static inline struct rtattr *
nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
static inline void
nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, mpls_stack ms)
{
  return nl_add_attr(h, bufsize, code, NULL, 0);
  const char *buf = mpls_hton(ms);
  nl_add_attr(h, bufsize, code, buf, mpls_buflen(buf));
}

static inline void
nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, mpls_stack ms)
{
  a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
  struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
  nl_add_attr_mpls(h, bufsize, RTA_DST, ms);
  nl_close_attr(h, nest);

  nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
}

static inline void
nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
{
  struct rtattr *nest = nl_open_attr(h, bufsize, RTA_VIA);
  struct rtvia *via = RTA_DATA(nest);

  h->nlmsg_len += sizeof(*via);

  if (ipa_is_ip4(ipa)) {
    ip4_addr ip4 = ipa_to_ip4(ipa);
    ip4 = ip4_hton(ip4);
    via->rtvia_family = AF_INET;
    memcpy(via->rtvia_addr, &ip4, sizeof(ip4));
    h->nlmsg_len += sizeof(ip4);
  } else {
    ip6_addr ip6 = ipa_to_ip6(ipa);
    ip6 = ip6_hton(ip6);
    via->rtvia_family = AF_INET6;
    memcpy(via->rtvia_addr, &ip6, sizeof(ip6));
    h->nlmsg_len += sizeof(ip6);
  }

  nl_close_attr(h, nest);
}

static inline struct rtnexthop *
@@ -411,7 +520,7 @@ nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
static inline void
nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
{
  nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
  nh->rtnh_len = (void *)h + h->nlmsg_len - (void *)nh;
}

static void
@@ -909,6 +1018,15 @@ nh_bufsize(struct mpnh *nh)
  return rv;
}

static inline mpls_stack
mpls_from_ea(struct adata *ad) {
  mpls_stack s = { .len = ad->length/sizeof(u32) };
  int i;
  for (i = 0; i < s.len; i++)
    s.label[i] = ((u32 *) ad->data)[i];
  return s;
}

static int
nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
{
@@ -933,6 +1051,9 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
  r.r.rtm_dst_len = net_pxlen(net->n.addr);
  r.r.rtm_protocol = RTPROT_BIRD;
  r.r.rtm_scope = RT_SCOPE_UNIVERSE;
  if (p->af == AF_MPLS)
    nl_add_attr_mpls(&r.h, sizeof(r), RTA_DST, net_mpls(net->n.addr));
  else
    nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net_prefix(net->n.addr));

  if (krt_table_id(p) < 256)
@@ -944,6 +1065,11 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
  if (!new)
    return nl_exchange(&r.h);

  if (ea = ea_find(eattrs, EA_GEN_MPLS_STACK))
    if (p->af == AF_MPLS)
      nl_add_attr_mpls(&r.h, sizeof(r), RTA_NEWDST, mpls_from_ea(ea->u.ptr));
    else
      nl_add_attr_mpls_encap(&r.h, sizeof(r), mpls_from_ea(ea->u.ptr));

  if (ea = ea_find(eattrs, EA_KRT_METRIC))
    nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, ea->u.data);
@@ -977,6 +1103,9 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
    case RTD_ROUTER:
      r.r.rtm_type = RTN_UNICAST;
      nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
      if (p->af == AF_MPLS)
	nl_add_attr_via(&r.h, sizeof(r), a->gw);
      else
	nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw);
      break;
    case RTD_DEVICE:
@@ -1068,6 +1197,16 @@ nl_parse_route(struct nlmsghdr *h, int scan)
	net_fill_ip6(&dst, IP6_NONE, 0);
      break;

    case AF_MPLS:
      if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
	return;

      if (a[RTA_DST])
	net_fill_mpls(&dst, rta_get_mpls(a[RTA_DST]).label[0]);
      else
	return; /* No support for MPLS routes without RTA_DST */
      break;

    default:
      return;
    }
@@ -1096,7 +1235,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
    SKIP("RTM_DELROUTE in scan\n");

  int c = net_classify(&dst);
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
  if (i->rtm_family != AF_MPLS && ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK)))
    SKIP("strange class/scope\n");

  // ignore rtm_scope, it is not a real scope
@@ -1160,9 +1299,12 @@ nl_parse_route(struct nlmsghdr *h, int scan)
	  return;
	}

      if (a[RTA_GATEWAY])
      if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY] || (i->rtm_family == AF_MPLS) && a[RTA_VIA])
	{
	  ra.dest = RTD_ROUTER;
	  if (i->rtm_family == AF_MPLS)
	    ra.gw = rta_get_via(a[RTA_VIA]);
	  else
	    ra.gw = rta_get_ipa(a[RTA_GATEWAY]);

	  /* Silently skip strange 6to4 routes */
@@ -1210,6 +1352,24 @@ nl_parse_route(struct nlmsghdr *h, int scan)
  if (a[RTA_PRIORITY])
    e->u.krt.metric = rta_get_u32(a[RTA_PRIORITY]);

  if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST])
    {
      mpls_stack ms = rta_get_mpls(a[RTA_NEWDST]);

      ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
      ea->next = ra.eattrs;
      ra.eattrs = ea;
      ea->flags = EALF_SORTED;
      ea->count = 1;
      ea->attrs[0].id = EA_KRT_PREFSRC;
      ea->attrs[0].flags = 0;
      ea->attrs[0].type = EAF_TYPE_INT_SET;
      ea->attrs[0].u.ptr = alloca(sizeof(struct adata) + sizeof(u32)*ms.len);
      ea->attrs[0].u.ptr->length = sizeof(u32)*ms.len;
      for (int j = 0; j < ms.len; j++)
	((u32 *)ea->attrs[0].u.ptr->data)[j] = ms.label[j];
    }

  if (a[RTA_PREFSRC])
    {
      ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
@@ -1295,6 +1455,13 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL
      nl_parse_route(h, 1);
    else
      log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);

  nl_request_dump(AF_MPLS, RTM_GETROUTE);
  while (h = nl_get_scan())
    if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
      nl_parse_route(h, 1);
    else
      log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
}

/*
+3 −2
Original line number Diff line number Diff line
@@ -1144,7 +1144,8 @@ krt_start(struct proto *P)
  {
  case NET_IP4:	p->af = AF_INET; break;
  case NET_IP6:	p->af = AF_INET6; break;
  default:	ASSERT(0);
  case NET_MPLS: p->af = AF_MPLS; break;
  default: log(L_ERR "KRT: Tried to start with strange net type: %d", p->p.net_type); return PS_START; break;
  }

  add_tail(&krt_proto_list, &p->krt_node);
@@ -1255,7 +1256,7 @@ struct protocol proto_unix_kernel = {
  .template =		"kernel%d",
  .attr_class =		EAP_KRT,
  .preference =		DEF_PREF_INHERITED,
  .channel_mask =	NB_IP,
  .channel_mask =	NB_IP | NB_MPLS,
  .proto_size =		sizeof(struct krt_proto),
  .config_size =	sizeof(struct krt_config),
  .preconfig =		krt_preconfig,