Commit 68197386 authored by Ondrej Zajicek (work)'s avatar Ondrej Zajicek (work)
Browse files

BGP: Long-lived graceful restart

The patch implements long-lived graceful restart for BGP, namely
draft-uttaro-idr-bgp-persistence-03.
parent 470efcb9
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -213,6 +213,7 @@ struct proto {
  int (*rte_better)(struct rte *, struct rte *);
  int (*rte_same)(struct rte *, struct rte *);
  int (*rte_mergable)(struct rte *, struct rte *);
  struct rte * (*rte_modify)(struct rte *, struct linpool *);
  void (*rte_insert)(struct network *, struct rte *);
  void (*rte_remove)(struct network *, struct rte *);

+3 −0
Original line number Diff line number Diff line
@@ -219,6 +219,7 @@ typedef struct rte {
#ifdef CONFIG_BGP
    struct {
      u8 suppressed;			/* Used for deterministic MED comparison */
      s8 stale;				/* Route is LLGR_STALE, -1 if unknown */
    } bgp;
#endif
#ifdef CONFIG_BABEL
@@ -241,6 +242,7 @@ typedef struct rte {
#define REF_FILTERED	2		/* Route is rejected by import filter */
#define REF_STALE	4		/* Route is stale in a refresh cycle */
#define REF_DISCARD	8		/* Route is scheduled for discard */
#define REF_MODIFY	16		/* Route is scheduled for modify */

/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); }
@@ -279,6 +281,7 @@ int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct fil
rte *rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, struct ea_list **tmpa, linpool *pool, int silent);
void rt_refresh_begin(rtable *t, struct announce_hook *ah);
void rt_refresh_end(rtable *t, struct announce_hook *ah);
void rt_modify_stale(rtable *t, struct announce_hook *ah);
void rte_dump(rte *);
void rte_free(rte *);
rte *rte_do_cow(rte *);
+62 −0
Original line number Diff line number Diff line
@@ -1295,6 +1295,28 @@ rte_discard(rte *old) /* Non-filtered route deletion, used during garbage collec
  rte_update_unlock();
}

/* Modify existing route by protocol hook, used for long-lived graceful restart */
static inline void
rte_modify(rte *old)
{
  rte_update_lock();

  rte *new = old->sender->proto->rte_modify(old, rte_update_pool);
  if (new != old)
  {
    if (new)
    {
      if (!rta_is_cached(new->attrs))
	new->attrs = rta_lookup(new->attrs);
      new->flags = (old->flags & ~REF_MODIFY) | REF_COW;
    }

    rte_recalculate(old->sender, old->net, new, old->attrs->src);
  }

  rte_update_unlock();
}

/* Check rtable for best route to given net whether it would be exported do p */
int
rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter)
@@ -1385,6 +1407,29 @@ rt_refresh_end(rtable *t, struct announce_hook *ah)
    rt_schedule_prune(t);
}

void
rt_modify_stale(rtable *t, struct announce_hook *ah)
{
  int prune = 0;
  net *n;
  rte *e;

  FIB_WALK(&t->fib, fn)
    {
      n = (net *) fn;
      for (e = n->routes; e; e = e->next)
	if ((e->sender == ah) && (e->flags & REF_STALE) && !(e->flags & REF_FILTERED))
	  {
	    e->flags |= REF_MODIFY;
	    prune = 1;
	  }
    }
  FIB_WALK_END;

  if (prune)
    rt_schedule_prune(t);
}


/**
 * rte_dump - dump a route
@@ -1604,6 +1649,7 @@ again:

    rescan:
      for (e=n->routes; e; e=e->next)
      {
	if (e->sender->proto->flushing || (e->flags & REF_DISCARD))
	  {
	    if (*limit <= 0)
@@ -1617,6 +1663,22 @@ again:

	    goto rescan;
	  }

	if (e->flags & REF_MODIFY)
	  {
	    if (*limit <= 0)
	      {
		FIB_ITERATE_PUT(fit, fn);
		return 0;
	      }

	    rte_modify(e);
	    (*limit)--;

	    goto rescan;
	  }
      }

      if (!n->routes)		/* Orphaned FIB entry */
	{
	  FIB_ITERATE_PUT(fit, fn);
+52 −1
Original line number Diff line number Diff line
@@ -1173,6 +1173,9 @@ bgp_community_filter(struct bgp_proto *p, rte *e)
	  DBG("\tNO_EXPORT\n");
	  return 1;
	}

      if (!p->conn->peer_llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
	return 1;
    }

  return 0;
@@ -1233,6 +1236,19 @@ rte_resolvable(rte *rt)
  return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH);
}

static inline int
rte_stale(rte *r)
{
  if (r->u.bgp.stale < 0)
  {
    /* If staleness is unknown, compute and cache it */
    eattr *a = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
    r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
  }

  return r->u.bgp.stale;
}

int
bgp_rte_better(rte *new, rte *old)
{
@@ -1257,6 +1273,14 @@ bgp_rte_better(rte *new, rte *old)
  if (n < o)
    return 0;

  /* LLGR draft - depreference stale routes */
  n = rte_stale(new);
  o = rte_stale(old);
  if (n > o)
    return 0;
  if (n < o)
    return 1;

  /* Start with local preferences */
  x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
  y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
@@ -1378,6 +1402,10 @@ bgp_rte_mergable(rte *pri, rte *sec)
  if (!rte_resolvable(sec))
    return 0;

  /* LLGR draft - depreference stale routes */
  if (rte_stale(pri) != rte_stale(sec))
    return 0;

  /* Start with local preferences */
  x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
  y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
@@ -1580,6 +1608,27 @@ bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
    return old_is_group_best;
}

struct rte *
bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
{
  eattr *a = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
  struct adata *ad = a ? a->u.ptr : NULL;

  if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
    return NULL;

  if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
    return r;

  r = rte_cow_rta(r, pool);
  bgp_attach_attr(&(r->attrs->eattrs), pool, BA_COMMUNITY,
		  (uintptr_t) int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
  r->u.bgp.stale = 1;

  return r;
}


static struct adata *
bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
{
@@ -1589,7 +1638,6 @@ bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
  return newa;
}


/* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
 * and append path old4 (in 4B format).
 */
@@ -1985,6 +2033,9 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
  if (e->u.bgp.suppressed)
    buf += bsprintf(buf, "-");

  if (rte_stale(e))
    buf += bsprintf(buf, "s");

  if (e->attrs->hostentry)
    {
      if (!rte_resolvable(e))
+64 −11
Original line number Diff line number Diff line
@@ -394,10 +394,17 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
  if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
    p->p.gr_wait = 1;

  if (p->gr_active)
  if (p->gr_active == BGP_GRS_ACTIVE)
    tm_stop(p->gr_timer);

  if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
  /* Check F-bit for regular graceful restart */
  if ((p->gr_active == BGP_GRS_ACTIVE) &&
      (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
    bgp_graceful_restart_done(p);

  /* Check F-bit for long-lived graceful restart */
  if (((p->gr_active == BGP_GRS_LLGR_1) || (p->gr_active == BGP_GRS_LLGR_2)) &&
      (!conn->peer_llgr_able || !(conn->peer_llgr_aflags & BGP_LLGRF_FORWARDING)))
    bgp_graceful_restart_done(p);

  /* GR capability implies that neighbor will send End-of-RIB */
@@ -474,11 +481,25 @@ bgp_handle_graceful_restart(struct bgp_proto *p)
	    p->gr_active ? " - already pending" : "");
  proto_notify_state(&p->p, PS_START);

  if (p->gr_active)
  switch (p->gr_active)
  {
  case BGP_GRS_ACTIVE:
    rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
    break;

  p->gr_active = 1;
  bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
  case BGP_GRS_LLGR_1:
    rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
    return;

  case BGP_GRS_LLGR_2:
    rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
    rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook);
    return;
  }

  p->stale_time = p->cf->llgr_mode ? p->conn->peer_llgr_time : 0;
  p->gr_active = !p->stale_time ? BGP_GRS_ACTIVE : BGP_GRS_LLGR_1;
  tm_start(p->gr_timer, p->conn->peer_gr_time);
  rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
}

@@ -515,10 +536,27 @@ bgp_graceful_restart_timeout(timer *t)
{
  struct bgp_proto *p = t->data;

  switch (p->gr_active)
  {
  case BGP_GRS_ACTIVE:
    BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
    bgp_stop(p, 0, NULL, 0);
}
    return;

  case BGP_GRS_LLGR_1:
    BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
    p->gr_active = BGP_GRS_LLGR_2;
    tm_start(p->gr_timer, p->stale_time);
    rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook);
    return;

  case BGP_GRS_LLGR_2:
    BGP_TRACE(D_EVENTS, "Long-lived graceful restart timeout");
    p->gr_active = 0;
    rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
    return;
  }
}

/**
 * bgp_refresh_begin - start incoming enhanced route refresh sequence
@@ -576,6 +614,10 @@ bgp_send_open(struct bgp_conn *conn)
  conn->peer_gr_time = 0;
  conn->peer_gr_flags = 0;
  conn->peer_gr_aflags = 0;
  conn->peer_llgr_aware = 0;
  conn->peer_llgr_able = 0;
  conn->peer_llgr_time = 0;
  conn->peer_llgr_aflags = 0;
  conn->peer_ext_messages_support = 0;

  DBG("BGP: Sending open\n");
@@ -1297,6 +1339,7 @@ bgp_init(struct proto_config *C)
  P->rte_better = bgp_rte_better;
  P->rte_mergable = bgp_rte_mergable;
  P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
  P->rte_modify = bgp_rte_modify_stale;

  p->cf = c;
  p->local_as = c->local_as;
@@ -1332,6 +1375,10 @@ bgp_check_config(struct bgp_config *c)
  if (!c->missing_lladdr)
    c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;

  /* LLGR mode default based on GR mode */
  if (c->llgr_mode < 0)
    c->llgr_mode = c->gr_mode ? BGP_LLGR_AWARE : 0;

  /* Disable after error incompatible with restart limit action */
  if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
    c->c.in_limit->action = PLA_DISABLE;
@@ -1382,6 +1429,9 @@ bgp_check_config(struct bgp_config *c)

  if (c->secondary && !c->c.table->sorted)
    cf_error("BGP with secondary option requires sorted table");

  if (!c->gr_mode && c->llgr_mode)
    cf_error("Long-lived graceful restart requires basic graceful restart");
}

static int
@@ -1550,6 +1600,11 @@ bgp_show_proto_info(struct proto *P)
  if (p->gr_active)
    cli_msg(-1006, "    Neighbor graceful restart active");

  if (p->gr_active && p->gr_timer->expires)
    cli_msg(-1006, "    %-15s   %d/-",
	    (p->gr_active != BGP_GRS_LLGR_2) ? "Restart timer:" : "LL stale timer:",
	    p->gr_timer->expires - now);

  if (P->proto_state == PS_START)
    {
      struct bgp_conn *oc = &p->outgoing_conn;
@@ -1563,9 +1618,6 @@ bgp_show_proto_info(struct proto *P)
	  (oc->connect_retry_timer->expires))
	cli_msg(-1006, "    Connect delay:    %d/%d",
		oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);

      if (p->gr_active && p->gr_timer->expires)
	cli_msg(-1006, "    Restart timer:    %d/-", p->gr_timer->expires - now);
    }
  else if (P->proto_state == PS_UP)
    {
@@ -1574,6 +1626,7 @@ bgp_show_proto_info(struct proto *P)
	      c->peer_refresh_support ? " refresh" : "",
	      c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
	      c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
	      c->peer_llgr_able ? " llgr-able" : (c->peer_llgr_aware ? " llgr-aware" : ""),
	      c->peer_as4_support ? " AS4" : "",
	      (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
	      (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
Loading