Commit 9aed29e6 authored by Ondrej Zajicek's avatar Ondrej Zajicek
Browse files

BGP: Enhanced route refresh (RFC 7313) support

Also hook feed_done is renamed to feed_end.
parent a5a5a41e
Loading
Loading
Loading
Loading
+11 −7
Original line number Diff line number Diff line
@@ -1804,13 +1804,17 @@ using the following configuration parameters:
	other means. Default: 0 (no local AS number allowed).

	<tag>enable route refresh <m/switch/</tag>
	When BGP speaker changes its import filter, it has to re-examine all
	routes received from its neighbor against the new filter. As these
	routes might not be available, there is a BGP protocol extension Route
	Refresh (specified in RFC 2918) that allows BGP speaker to request
	re-advertisement of all routes from its neighbor. This option specifies
	whether BIRD advertises this capability and accepts such requests. Even
	when disabled, BIRD can send route refresh requests. Default: on.
	After the initial route exchange, BGP protocol uses incremental updates
	to keep BGP speakers synchronized. Sometimes (e.g., if BGP speaker
	changes its import filter, or if there is suspicion of inconsistency) it
	is necessary to do a new complete route exchange. BGP protocol extension
	Route Refresh (RFC 2918) allows BGP speaker to request re-advertisement
	of all routes from its neighbor. BGP protocol extension Enhanced Route
	Refresh (RFC 7313) specifies explicit begin and end for such exchanges,
	therefore the receiver can remove stale routes that were not advertised
	during the exchange. This option specifies whether BIRD advertises these
	capabilities and supports related procedures. Note that even when
	disabled, BIRD can send route refresh requests. Default: on.

	<tag>graceful restart <m/switch/|aware</tag>
	When a BGP speaker restarts or crashes, neighbors will discard all
+5 −2
Original line number Diff line number Diff line
@@ -942,8 +942,8 @@ proto_feed_more(void *P)
      p->export_state = ES_READY;
      proto_log_state_change(p);

      if (p->feed_done)
	p->feed_done(p);
      if (p->feed_end)
	p->feed_end(p);
    }
  else
    {
@@ -976,6 +976,9 @@ proto_schedule_feed(struct proto *p, int initial)

  p->attn->hook = initial ? proto_feed_initial : proto_feed_more;
  ev_schedule(p->attn);

  if (p->feed_begin)
    p->feed_begin(p, initial);
}

/*
+4 −2
Original line number Diff line number Diff line
@@ -179,7 +179,8 @@ struct proto {
   *	   reload_routes   Request protocol to reload all its routes to the core
   *			(using rte_update()). Returns: 0=reload cannot be done,
   *			1= reload is scheduled and will happen (asynchronously).
   *	   feed_done	Notify protocol about finish of route feeding.
   *	   feed_begin	Notify protocol about beginning of route feeding.
   *	   feed_end	Notify protocol about finish of route feeding.
   */

  void (*if_notify)(struct proto *, unsigned flags, struct iface *i);
@@ -190,7 +191,8 @@ struct proto {
  void (*store_tmp_attrs)(struct rte *rt, struct ea_list *attrs);
  int (*import_control)(struct proto *, struct rte **rt, struct ea_list **attrs, struct linpool *pool);
  int (*reload_routes)(struct proto *);
  void (*feed_done)(struct proto *);
  void (*feed_begin)(struct proto *, int initial);
  void (*feed_end)(struct proto *);

  /*
   *	Routing entry hooks (called only for routes belonging to this protocol):
+97 −5
Original line number Diff line number Diff line
@@ -377,6 +377,8 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
  p->conn = conn;
  p->last_error_class = 0;
  p->last_error_code = 0;
  p->feed_state = BFS_NONE;
  p->load_state = BFS_NONE;
  bgp_init_bucket_table(p);
  bgp_init_prefix_table(p, 8);

@@ -394,6 +396,12 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
  if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
    bgp_graceful_restart_done(p);

  /* GR capability implies that neighbor will send End-of-RIB */
  if (conn->peer_gr_aware)
    p->load_state = BFS_LOADING;

  /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */

  bgp_conn_set_state(conn, BS_ESTABLISHED);
  proto_notify_state(&p->p, PS_UP);
}
@@ -504,6 +512,47 @@ bgp_graceful_restart_timeout(timer *t)
  bgp_stop(p, 0);
}


/**
 * bgp_refresh_begin - start incoming enhanced route refresh sequence
 * @p: BGP instance
 *
 * This function is called when an incoming enhanced route refresh sequence is
 * started by the neighbor, demarcated by the BoRR packet. The function updates
 * the load state and starts the routing table refresh cycle. Note that graceful
 * restart also uses routing table refresh cycle, but RFC 7313 and load states
 * ensure that these two sequences do not overlap.
 */
void
bgp_refresh_begin(struct bgp_proto *p)
{
  if (p->load_state == BFS_LOADING)
    { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }

  p->load_state = BFS_REFRESHING;
  rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
}

/**
 * bgp_refresh_end - finish incoming enhanced route refresh sequence
 * @p: BGP instance
 *
 * This function is called when an incoming enhanced route refresh sequence is
 * finished by the neighbor, demarcated by the EoRR packet. The function updates
 * the load state and ends the routing table refresh cycle. Routes not received
 * during the sequence are removed by the nest.
 */
void
bgp_refresh_end(struct bgp_proto *p)
{
  if (p->load_state != BFS_REFRESHING)
    { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }

  p->load_state = BFS_NONE;
  rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
}


static void
bgp_send_open(struct bgp_conn *conn)
{
@@ -514,6 +563,7 @@ bgp_send_open(struct bgp_conn *conn)
  conn->peer_refresh_support = 0;
  conn->peer_as4_support = 0;
  conn->peer_add_path = 0;
  conn->peer_enhanced_refresh_support = 0;
  conn->peer_gr_aware = 0;
  conn->peer_gr_able = 0;
  conn->peer_gr_time = 0;
@@ -959,16 +1009,56 @@ bgp_reload_routes(struct proto *P)
}

static void
bgp_feed_done(struct proto *P)
bgp_feed_begin(struct proto *P, int initial)
{
  struct bgp_proto *p = (struct bgp_proto *) P;
  if (!p->conn || !p->cf->gr_mode || p->p.refeeding)

  /* This should not happen */
  if (!p->conn)
    return;

  p->send_end_mark = 1;
  if (initial && p->cf->gr_mode)
    p->feed_state = BFS_LOADING;

  /* It is refeed and both sides support enhanced route refresh */
  if (!initial && p->cf->enable_refresh &&
      p->conn->peer_enhanced_refresh_support)
    {
      /* BoRR must not be sent before End-of-RIB */
      if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
	return;

      p->feed_state = BFS_REFRESHING;
      bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
    }
}

static void
bgp_feed_end(struct proto *P)
{
  struct bgp_proto *p = (struct bgp_proto *) P;

  /* This should not happen */
  if (!p->conn)
    return;

  /* Non-demarcated feed ended, nothing to do */
  if (p->feed_state == BFS_NONE)
    return;

  /* Schedule End-of-RIB packet */
  if (p->feed_state == BFS_LOADING)
    p->feed_state = BFS_LOADED;

  /* Schedule EoRR packet */
  if (p->feed_state == BFS_REFRESHING)
    p->feed_state = BFS_REFRESHED;

  /* Kick TX hook */
  bgp_schedule_packet(p->conn, PKT_UPDATE);
}


static void
bgp_start_locked(struct object_lock *lock)
{
@@ -1150,7 +1240,8 @@ bgp_init(struct proto_config *C)
  P->import_control = bgp_import_control;
  P->neigh_notify = bgp_neigh_notify;
  P->reload_routes = bgp_reload_routes;
  P->feed_done = bgp_feed_done;
  P->feed_begin = bgp_feed_begin;
  P->feed_end = bgp_feed_end;
  P->rte_better = bgp_rte_better;
  P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;

@@ -1426,8 +1517,9 @@ bgp_show_proto_info(struct proto *P)
  else if (P->proto_state == PS_UP)
    {
      cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
      cli_msg(-1006, "    Neighbor caps:   %s%s%s%s%s",
      cli_msg(-1006, "    Neighbor caps:   %s%s%s%s%s%s",
	      c->peer_refresh_support ? " refresh" : "",
	      c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
	      c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
	      c->peer_as4_support ? " AS4" : "",
	      (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
+39 −7
Original line number Diff line number Diff line
@@ -103,6 +103,7 @@ struct bgp_conn {
  u8 peer_refresh_support;		/* Peer supports route refresh [RFC2918] */
  u8 peer_as4_support;			/* Peer supports 4B AS numbers [RFC4893] */
  u8 peer_add_path;			/* Peer supports ADD-PATH [draft] */
  u8 peer_enhanced_refresh_support;	/* Peer supports enhanced refresh [RFC7313] */
  u8 peer_gr_aware;
  u8 peer_gr_able;
  u16 peer_gr_time;
@@ -127,6 +128,8 @@ struct bgp_proto {
  int rs_client;			/* Whether neighbor is RS client of me */
  u8 gr_ready;				/* Neighbor could do graceful restart */
  u8 gr_active;				/* Neighbor is doing graceful restart */
  u8 feed_state;			/* Feed state (TX) for EoR, RR packets, see BFS_* */
  u8 load_state;			/* Load state (RX) for EoR, RR packets, see BFS_* */
  struct bgp_conn *conn;		/* Connection we have established */
  struct bgp_conn outgoing_conn;	/* Outgoing connection we're working with */
  struct bgp_conn incoming_conn;	/* Incoming connection we have neither accepted nor rejected yet */
@@ -144,7 +147,6 @@ struct bgp_proto {
  slab *prefix_slab;			/* Slab holding prefix nodes */
  list bucket_queue;			/* Queue of buckets to send */
  struct bgp_bucket *withdraw_bucket;	/* Withdrawn routes */
  unsigned send_end_mark;		/* End-of-RIB mark scheduled for transmit */
  unsigned startup_delay;		/* Time to delay protocol startup by due to errors */
  bird_clock_t last_proto_error;	/* Time of last error that leads to protocol stop */
  u8 last_error_class; 			/* Error class of last error */
@@ -196,6 +198,8 @@ void bgp_conn_enter_close_state(struct bgp_conn *conn);
void bgp_conn_enter_idle_state(struct bgp_conn *conn);
void bgp_handle_graceful_restart(struct bgp_proto *p);
void bgp_graceful_restart_done(struct bgp_proto *p);
void bgp_refresh_begin(struct bgp_proto *p);
void bgp_refresh_end(struct bgp_proto *p);
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
void bgp_stop(struct bgp_proto *p, unsigned subcode);

@@ -263,7 +267,8 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define PKT_UPDATE		0x02
#define PKT_NOTIFICATION	0x03
#define PKT_KEEPALIVE		0x04
#define PKT_ROUTE_REFRESH	0x05
#define PKT_ROUTE_REFRESH	0x05	/* [RFC2918] */
#define PKT_BEGIN_REFRESH	0x1e	/* Dummy type for BoRR packet [RFC7313] */
#define PKT_SCHEDULE_CLOSE	0x1f	/* Used internally to schedule socket close */

/* Attributes */
@@ -309,10 +314,10 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
 *
 * Used in PS_START for fine-grained specification of starting state.
 *
 * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP protocol
 * done what is neccessary to start itself (like acquiring the lock), it goes to BSS_CONNECT.
 * When some connection attempt failed because of option or capability error, it goes to
 * BSS_CONNECT_NOCAP.
 * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP
 * protocol done what is neccessary to start itself (like acquiring the lock),
 * it goes to BSS_CONNECT.  When some connection attempt failed because of
 * option or capability error, it goes to BSS_CONNECT_NOCAP.
 */

#define BSS_PREPARE		0	/* Used before ordinary BGP started, i. e. waiting for lock */
@@ -320,6 +325,33 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
#define BSS_CONNECT		2	/* Ordinary BGP connecting */
#define BSS_CONNECT_NOCAP	3	/* Legacy BGP connecting (without capabilities) */


/* BGP feed states (TX)
 *
 * RFC 4724 specifies that an initial feed should end with End-of-RIB mark.
 *
 * RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets.
 *
 * These states (stored in p->feed_state) are used to keep track of these
 * requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is
 * set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB
 * or EoRR packet. When the packet is sent, the state returned to BFS_NONE.
 *
 * Note that when a non-demarcated feed (e.g. plain RFC 4271 initial load
 * without End-of-RIB or plain RFC 2918 route refresh without BoRR/EoRR
 * demarcation) is active, BFS_NONE is set.
 *
 * BFS_NONE, BFS_LOADING and BFS_REFRESHING are also used as load states (RX)
 * with correspondent semantics (-, expecting End-of-RIB, expecting EoRR).
 */

#define BFS_NONE		0	/* No feed or original non-demarcated feed */
#define BFS_LOADING		1	/* Initial feed active, End-of-RIB planned */
#define BFS_LOADED		2	/* Loading done, End-of-RIB marker scheduled */
#define BFS_REFRESHING		3	/* Route refresh (introduced by BoRR) active */
#define BFS_REFRESHED		4	/* Refresh done, EoRR packet scheduled */


/* Error classes */

#define BE_NONE			0
Loading