Commit 09ee846d authored by Ondrej Zajicek (work)'s avatar Ondrej Zajicek (work)
Browse files

BGP: AIGP metric support (RFC 7311)

parent 759b204b
Loading
Loading
Loading
Loading
+41 −3
Original line number Diff line number Diff line
@@ -2161,6 +2161,7 @@ avoid routing loops.
<item> <rfc id="6286"> - AS-Wide Unique BGP Identifier
<item> <rfc id="6608"> - Subcodes for BGP Finite State Machine Error
<item> <rfc id="6793"> - BGP Support for 4-Octet AS Numbers
<item> <rfc id="7311"> - Accumulated IGP Metric Attribute for BGP
<item> <rfc id="7313"> - Enhanced Route Refresh Capability for BGP
<item> <rfc id="7606"> - Revised Error Handling for BGP UPDATE Messages
<item> <rfc id="7911"> - Advertisement of Multiple Paths in BGP
@@ -2739,6 +2740,36 @@ be used in explicit configuration.
	TX direction. When active, all available routes accepted by the export
	filter are advertised to the neighbor. Default: off.

	<tag><label id="bgp-aigp">aigp <m/switch/|originate</tag>
	The BGP protocol does not use a common metric like other routing
	protocols, instead it uses a set of criteria for route selection
	consisting both overall AS path length and a distance to the nearest AS
	boundary router. Assuming that metrics of different autonomous systems
	are incomparable, once a route is propagated from an AS to a next one,
	the distance in the old AS does not matter.

	The AIGP extension (<rfc id="7311">) allows to propagate accumulated
	IGP metric (in the AIGP attribute) through both IBGP and EBGP links,
	computing total distance through multiple autonomous systems (assuming
	they use comparable IGP metric). The total AIGP metric is compared in
	the route selection process just after Local Preference comparison (and
	before AS path length comparison).

	This option controls whether AIGP attribute propagation is allowed on
	the session. Optionally, it can be set to <cf/originate/, which not only
	allows AIGP attribute propagation, but also new AIGP attributes are
	automatically attached to non-BGP routes with valid IGP metric (e.g.
	<cf/ospf_metric1/) as they are exported to the BGP session. Default:
	enabled for IBGP (and intra-confederation EBGP), disabled for regular
	EBGP.

	<tag><label id="bgp-cost">cost <m/number/</tag>
	When BGP <ref id="bgp-gateway" name="gateway mode"> is <cf/recursive/
	(mainly multihop IBGP sessions), then the distance to BGP next hop is
	based on underlying IGP metric. This option specifies the distance to
	BGP next hop for BGP sessions in direct gateway mode (mainly direct
	EBGP sessions).

	<tag><label id="bgp-graceful-restart-c">graceful restart <m/switch/</tag>
	Although BGP graceful restart is configured mainly by protocol-wide
	<ref id="bgp-graceful-restart" name="options">, it is possible to
@@ -2807,9 +2838,11 @@ some of them (marked with `<tt/O/') are optional.
	presence of which indicates that the route has been aggregated from
	multiple routes by some router on the path from the originator.

<!-- we don't handle aggregators right since they are of a very obscure type
	<tag>bgp_aggregator</tag>
-->
	<tag><label id="rta-bgp-aggregator">void bgp_aggregator [O]</tag>
	This is an optional attribute specifying AS number and IP address of the
	BGP router that created the route by aggregating multiple BGP routes.
	Currently, the attribute is not accessible from filters.

	<tag><label id="rta-bgp-community">clist bgp_community [O]</tag>
	List of community values associated with the route. Each such value is a
	pair (represented as a <cf/pair/ data type inside the filters) of 16-bit
@@ -2844,6 +2877,11 @@ some of them (marked with `<tt/O/') are optional.
	<tag><label id="rta-bgp-cluster-list">clist bgp_cluster_list [I, O]</tag>
	This attribute contains a list of cluster IDs of route reflectors. Each
	route reflector prepends its cluster ID when reflecting the route.

	<tag><label id="rta-bgp-aigp">void bgp_aigp [O]</tag>
	This attribute contains accumulated IGP metric, which is a total
	distance to the destination through multiple autonomous systems.
	Currently, the attribute is not accessible from filters.
</descrip>

<sect1>Example
+1 −0
Original line number Diff line number Diff line
@@ -652,6 +652,7 @@ void rta_dump(rta *);
void rta_dump_all(void);
void rta_show(struct cli *, rta *);

u32 rt_get_igp_metric(rte *rt);
struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep);
void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls);

+13 −1
Original line number Diff line number Diff line
@@ -44,6 +44,10 @@
#include "lib/string.h"
#include "lib/alloca.h"

#ifdef CONFIG_BGP
#include "proto/bgp/bgp.h"
#endif

pool *rt_table_pool;

static slab *rte_slab;
@@ -2934,7 +2938,7 @@ if_local_addr(ip_addr a, struct iface *i)
  return 0;
}

static u32
u32
rt_get_igp_metric(rte *rt)
{
  eattr *ea = ea_find(rt->attrs->eattrs, EA_GEN_IGP_METRIC);
@@ -2956,6 +2960,14 @@ rt_get_igp_metric(rte *rt)
    return rt->u.rip.metric;
#endif

#ifdef CONFIG_BGP
  if (a->source == RTS_BGP)
  {
    u64 metric = bgp_total_aigp_metric(rt);
    return (u32) MIN(metric, (u64) IGP_METRIC_UNKNOWN);
  }
#endif

  if (a->source == RTS_DEVICE)
    return 0;

+256 −9
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@
 *    are probably inadequate.
 *
 * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
 * 4271 does not explicitly specifiy the behavior in that case.
 * 4271 does not explicitly specify the behavior in that case.
 *
 * Loop detection related to route reflection (based on ORIGINATOR_ID
 * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
@@ -199,6 +199,179 @@ bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
}


/*
 *	AIGP handling
 */

static int
bgp_aigp_valid(byte *data, uint len, char *err, uint elen)
{
  byte *pos = data;
  char *err_dsc = NULL;
  uint err_val = 0;

#define BAD(DSC,VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; })
  while (len)
  {
    if (len < 3)
      BAD("TLV framing error", len);

    /* Process one TLV */
    uint ptype = pos[0];
    uint plen = get_u16(pos + 1);

    if (len < plen)
      BAD("TLV framing error", plen);

    if (plen < 3)
      BAD("Bad TLV length", plen);

    if ((ptype == BGP_AIGP_METRIC) && (plen != 11))
      BAD("Bad AIGP TLV length", plen);

    ADVANCE(pos, len, plen);
  }
#undef BAD

  return 1;

bad:
  if (err)
    if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0)
      err[0] = 0;

  return 0;
}

static const byte *
bgp_aigp_get_tlv(const struct adata *ad, uint type)
{
  if (!ad)
    return NULL;

  uint len = ad->length;
  const byte *pos = ad->data;

  while (len)
  {
    uint ptype = pos[0];
    uint plen = get_u16(pos + 1);

    if (ptype == type)
      return pos;

    ADVANCE(pos, len, plen);
  }

  return NULL;
}

static const struct adata *
bgp_aigp_set_tlv(struct linpool *pool, const struct adata *ad, uint type, byte *data, uint dlen)
{
  uint len = ad ? ad->length : 0;
  const byte *pos = ad ? ad->data : NULL;
  struct adata *res = lp_alloc_adata(pool, len + 3 + dlen);
  byte *dst = res->data;
  byte *tlv = NULL;
  int del = 0;

  while (len)
  {
    uint ptype = pos[0];
    uint plen = get_u16(pos + 1);

    /* Find position for new TLV */
    if ((ptype >= type) && !tlv)
    {
      tlv = dst;
      dst += 3 + dlen;
    }

    /* Skip first matching TLV, copy others */
    if ((ptype == type) && !del)
      del = 1;
    else
    {
      memcpy(dst, pos, plen);
      dst += plen;
    }

    ADVANCE(pos, len, plen);
  }

  if (!tlv)
  {
    tlv = dst;
    dst += 3 + dlen;
  }

  /* Store the TLD */
  put_u8(tlv + 0, type);
  put_u16(tlv + 1, 3 + dlen);
  memcpy(tlv + 3, data, dlen);

  /* Update length */
  res->length = dst - res->data;

  return res;
}

static u64 UNUSED
bgp_aigp_get_metric(const struct adata *ad, u64 def)
{
  const byte *b = bgp_aigp_get_tlv(ad, BGP_AIGP_METRIC);
  return b ? get_u64(b + 3) : def;
}

static const struct adata *
bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
{
  byte data[8];
  put_u64(data, metric);
  return bgp_aigp_set_tlv(pool, ad, BGP_AIGP_METRIC, data, 8);
}

int
bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad)
{
  eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP));
  if (!a)
    return 0;

  const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
  if (!b)
    return 0;

  u64 aigp = get_u64(b + 3);
  u64 step = e->attrs->igp_metric;

  if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
    step = BGP_AIGP_MAX;

  if (!step)
    step = 1;

  *ad = a->u.ptr;
  *metric = aigp + step;
  if (*metric < aigp)
    *metric = BGP_AIGP_MAX;

  return 1;
}

static inline int
bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
{
  if (e->attrs->source == RTS_BGP)
    return 0;

  *metric = rt_get_igp_metric(e);
  *ad = NULL;
  return *metric < IGP_METRIC_UNKNOWN;
}


/*
 *	Attribute hooks
 */
@@ -604,6 +777,42 @@ bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byt
  bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
}


static void
bgp_export_aigp(struct bgp_export_state *s, eattr *a)
{
  if (!s->channel->cf->aigp)
    UNSET(a);
}

static void
bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
{
  char err[128];

  /* Acceptability test postponed to bgp_finish_attrs() */

  if ((flags ^ bgp_attr_table[BA_AIGP].flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
    DISCARD("Malformed AIGP attribute - conflicting flags (%02x)", flags);

  if (!bgp_aigp_valid(data, len, err, sizeof(err)))
    DISCARD("Malformed AIGP attribute - %s", err);

  bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len);
}

static void
bgp_format_aigp(eattr *a, byte *buf, uint size UNUSED)
{
  const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);

  if (!b)
    bsprintf(buf, "?");
  else
    bsprintf(buf, "%lu", get_u64(b + 3));
}


static void
bgp_export_large_community(struct bgp_export_state *s, eattr *a)
{
@@ -820,6 +1029,15 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
    .decode = bgp_decode_as4_aggregator,
    .format = bgp_format_aggregator,
  },
  [BA_AIGP] = {
    .name = "aigp",
    .type = EAF_TYPE_OPAQUE,
    .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
    .export = bgp_export_aigp,
    .encode = bgp_encode_raw,
    .decode = bgp_decode_aigp,
    .format = bgp_format_aigp,
  },
  [BA_LARGE_COMMUNITY] = {
    .name = "large_community",
    .type = EAF_TYPE_LC_SET,
@@ -1021,7 +1239,8 @@ bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, ui
    const struct bgp_attr_desc *desc = &bgp_attr_table[code];

    /* Handle conflicting flags; RFC 7606 3 (c) */
    if ((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
    if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
	!(desc->flags & BAF_DECODE_FLAGS))
      WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);

    desc->decode(s, code, flags, data, len, to);
@@ -1150,6 +1369,17 @@ withdraw:
  return NULL;
}

void
bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
{
  /* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
  if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
  {
    REPORT("Discarding AIGP attribute received on non-AIGP session");
    bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP);
  }
}


/*
 *	Route bucket hash table
@@ -1481,6 +1711,16 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
  if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
    bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);

  /* AIGP attribute - accumulate local metric or originate new one */
  u64 metric;
  if (s.local_next_hop &&
      (bgp_total_aigp_metric_(e, &metric, &ad) ||
       (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
  {
    ad = bgp_aigp_set_metric(pool, ad, metric);
    bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad);
  }

  /* IBGP route reflection, RFC 4456 */
  if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
  {
@@ -1578,12 +1818,6 @@ bgp_get_neighbor(rte *r)
  return p->cf->confederation ?: p->local_as;
}

static inline int
rte_resolvable(rte *rt)
{
  return rt->attrs->dest == RTD_UNICAST;
}

static inline int
rte_stale(rte *r)
{
@@ -1639,6 +1873,14 @@ bgp_rte_better(rte *new, rte *old)
  if (n < o)
    return 0;

  /* RFC 7311 4.1 - Apply AIGP metric */
  u64 n2 = bgp_total_aigp_metric(new);
  u64 o2 = bgp_total_aigp_metric(old);
  if (n2 < o2)
    return 1;
  if (n2 > o2)
    return 0;

  /* RFC 4271 9.1.2.2. a)  Use AS path lengths */
  if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
  {
@@ -2062,7 +2304,12 @@ bgp_get_route_info(rte *e, byte *buf)
  if (rte_stale(e))
    buf += bsprintf(buf, "s");

  if (e->attrs->hostentry)
  u64 metric = bgp_total_aigp_metric(e);
  if (metric < BGP_AIGP_MAX)
  {
    buf += bsprintf(buf, "/%lu", metric);
  }
  else if (e->attrs->igp_metric)
  {
    if (!rte_resolvable(e))
      buf += bsprintf(buf, "/-");
+11 −2
Original line number Diff line number Diff line
@@ -92,6 +92,7 @@
 * RFC 6286 - AS-Wide Unique BGP Identifier
 * RFC 6608 - Subcodes for BGP Finite State Machine Error
 * RFC 6793 - BGP Support for 4-Octet AS Numbers
 * RFC 7311 - Accumulated IGP Metric Attribute for BGP
 * RFC 7313 - Enhanced Route Refresh Capability for BGP
 * RFC 7606 - Revised Error Handling for BGP UPDATE Messages
 * RFC 7911 - Advertisement of Multiple Paths in BGP
@@ -1979,6 +1980,10 @@ bgp_postconfig(struct proto_config *CF)
    if (cc->llgr_time == ~0U)
      cc->llgr_time = cf->llgr_time;

    /* AIGP enabled by default on interior sessions */
    if (cc->aigp == 0xff)
      cc->aigp = interior;

    /* Default values of IGP tables */
    if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
    {
@@ -2087,13 +2092,17 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *impor
  if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP))
    return 0;

  if (new->gw_mode != old->gw_mode)
  if ((new->gw_mode != old->gw_mode) ||
      (new->aigp != old->aigp) ||
      (new->cost != old->cost))
    *import_changed = 1;

  if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) ||
      (new->next_hop_self != old->next_hop_self) ||
      (new->next_hop_keep != old->next_hop_keep) ||
      (new->missing_lladdr != old->missing_lladdr))
      (new->missing_lladdr != old->missing_lladdr) ||
      (new->aigp != old->aigp) ||
      (new->aigp_originate != old->aigp_originate))
    *export_changed = 1;

  c->cf = new;
Loading