Commit 878eeec1 authored by Maria Matejka's avatar Maria Matejka
Browse files

Routing tables now have their own loops.

This basically means that:
* there are some more levels of indirection and asynchronicity, mostly
  in cleanup procedures, requiring correct lock ordering
* all the internal table operations (prune, next hop update) are done
  without blocking the other parts of BIRD
* the protocols may get their own loops very soon
parent c7d0c5b2
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -1212,7 +1212,7 @@
  INST(FI_ROA_CHECK_IMPLICIT, 0, 1) {	/* ROA Check */
    NEVER_CONSTANT;
    RTC(1);
    struct rtable *table = rtc->table;
    rtable *table = rtc->table;
    ACCESS_RTE;
    ACCESS_EATTRS;
    const net_addr *net = fs->rte->net;
@@ -1244,7 +1244,7 @@
    ARG(1, T_NET);
    ARG(2, T_INT);
    RTC(3);
    struct rtable *table = rtc->table;
    rtable *table = rtc->table;

    u32 as = v2.val.i;

+47 −28
Original line number Diff line number Diff line
@@ -172,7 +172,7 @@ proto_cf_find_channel(struct proto_config *pc, uint net_type)
 * Returns pointer to channel or NULL
 */
struct channel *
proto_find_channel_by_table(struct proto *p, struct rtable *t)
proto_find_channel_by_table(struct proto *p, rtable *t)
{
  struct channel *c;

@@ -236,7 +236,9 @@ proto_add_channel(struct proto *p, struct channel_config *cf)
  c->channel = cf->channel;
  c->proto = p;
  c->table = cf->table->table;
  rt_lock_table(c->table);

  RT_LOCKED(c->table, t)
    rt_lock_table(t);

  c->in_filter = cf->in_filter;
  c->out_filter = cf->out_filter;
@@ -277,7 +279,9 @@ proto_remove_channel(struct proto *p UNUSED, struct channel *c)

  CD(c, "Removed", c->name);

  rt_unlock_table(c->table);
  RT_LOCKED(c->table, t)
    rt_unlock_table(t);

  rem_node(&c->n);
  mb_free(c);
}
@@ -391,7 +395,7 @@ static void
channel_roa_subscribe_filter(struct channel *c, int dir)
{
  const struct filter *f = dir ? c->in_filter : c->out_filter;
  struct rtable *tab;
  rtable *tab;
  int valid = 1, found = 0;

  if ((f == FILTER_ACCEPT) || (f == FILTER_REJECT))
@@ -560,11 +564,11 @@ channel_check_stopped(struct channel *c)
}

void
channel_import_stopped(struct rt_import_request *req)
channel_import_stopped(void *_c)
{
  struct channel *c = SKIP_BACK(struct channel, in_req, req);
  struct channel *c = _c;

  req->hook = NULL;
  c->in_req.hook = NULL;

  mb_free(c->in_req.name);
  c->in_req.name = NULL;
@@ -661,17 +665,16 @@ channel_aux_stopped(void *data)
  else
    c->in_table = NULL;

  rfree(cat->tab->rp);

  rfree(cat->tab->priv.rp);
  mb_free(cat);
  return channel_check_stopped(c);
  channel_check_stopped(c);
}

static void
channel_aux_import_stopped(struct rt_import_request *req)
channel_aux_import_stopped(void *_cat)
{
  struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, push, req);
  ASSERT_DIE(cat->tab->delete_event);
  struct channel_aux_table *cat = _cat;
  cat->push.hook = NULL;
}

static void
@@ -680,24 +683,35 @@ channel_aux_export_stopped(struct rt_export_request *req)
  struct channel_aux_table *cat = SKIP_BACK(struct channel_aux_table, get, req);
  req->hook = NULL;

  if (cat->refeed_pending && !cat->tab->delete_event)
  {
  int del;
  RT_LOCKED(cat->tab, t)
    del = !!t->delete_event;

  if (del)
    return;

  ASSERT_DIE(cat->refeed_pending);
  cat->refeed_pending = 0;
  rt_request_export(cat->tab, req);
}
  else
    ASSERT_DIE(cat->tab->delete_event);
}

static void
channel_aux_stop(struct channel_aux_table *cat)
{
  rt_stop_import(&cat->push, channel_aux_import_stopped);
  rt_stop_export(&cat->get, channel_aux_export_stopped);
  RT_LOCKED(cat->tab, t)
  {
    t->delete_event = ev_new_init(t->rp, channel_aux_stopped, cat);
    t->delete_event->list = proto_event_list(cat->c->proto);
  }

  cat->tab->delete_event = ev_new_init(cat->tab->rp, channel_aux_stopped, cat);
  cat->push_stopped = (event) {
    .hook = channel_aux_import_stopped,
    .data = cat,
    .list = proto_event_list(cat->c->proto),
  };

  rt_unlock_table(cat->tab);
  rt_stop_import(&cat->push, &cat->push_stopped);
  rt_stop_export(&cat->get, channel_aux_export_stopped);
}

static void
@@ -889,7 +903,6 @@ channel_setup_in_table(struct channel *c, int best)

  c->in_table->c = c;
  c->in_table->tab = rt_setup(c->proto->pool, &cat->tab_cf);
  rt_lock_table(c->in_table->tab);

  rt_request_import(c->in_table->tab, &c->in_table->push);
  rt_request_export(c->in_table->tab, &c->in_table->get);
@@ -931,7 +944,6 @@ channel_setup_out_table(struct channel *c)

  c->out_table->c = c;
  c->out_table->tab = rt_setup(c->proto->pool, &cat->tab_cf);
  rt_lock_table(c->out_table->tab);

  rt_request_import(c->out_table->tab, &c->out_table->push);
  rt_request_export(c->out_table->tab, &c->out_table->get);
@@ -993,7 +1005,14 @@ channel_do_stop(struct channel *c)

  /* Stop import */
  if (c->in_req.hook)
    rt_stop_import(&c->in_req, channel_import_stopped);
  {
    c->in_stopped = (event) {
      .hook = channel_import_stopped,
      .data = c,
      .list = proto_event_list(c->proto),
    };
    rt_stop_import(&c->in_req, &c->in_stopped);
  }

  c->gr_wait = 0;
  if (c->gr_lock)
@@ -2339,7 +2358,7 @@ proto_do_start(struct proto *p)
{
  p->active = 1;

  rt_init_sources(&p->sources, p->name, proto_event_list(p));
  rt_init_sources(&p->sources, p->name, proto_work_list(p));
  if (!p->sources.class)
    p->sources.class = &default_rte_owner_class;

+5 −4
Original line number Diff line number Diff line
@@ -18,7 +18,6 @@

struct iface;
struct ifa;
struct rtable;
struct rte;
struct neighbor;
struct rta;
@@ -207,7 +206,7 @@ struct proto {
   *	   rte_remove	Called whenever a rte is removed from the routing table.
   */

  int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *);
  int (*rte_recalculate)(rtable *, struct network *, struct rte *, struct rte *, struct rte *);
  int (*rte_better)(struct rte *, struct rte *);
  int (*rte_mergable)(struct rte *, struct rte *);
  void (*rte_insert)(struct network *, struct rte *);
@@ -496,7 +495,7 @@ struct channel {
  const struct channel_class *channel;
  struct proto *proto;

  struct rtable *table;
  rtable *table;
  const struct filter *in_filter;	/* Input filter */
  const struct filter *out_filter;	/* Output filter */
  struct bmap export_map;		/* Keeps track which routes were really exported */
@@ -556,6 +555,7 @@ struct channel {
  btime last_state_change;		/* Time of last state transition */

  struct channel_aux_table *in_table;	/* Internal table for received routes */
  struct event in_stopped;		/* Import stop callback */

  u8 reload_pending;			/* Reloading and another reload is scheduled */
  u8 refeed_pending;			/* Refeeding and another refeed is scheduled */
@@ -570,6 +570,7 @@ struct channel_aux_table {
  struct channel *c;
  struct rt_import_request push;
  struct rt_export_request get;
  event push_stopped;
  rtable *tab;
  event *stop;
  u8 refeed_pending;
@@ -633,7 +634,7 @@ struct channel_config *proto_cf_find_channel(struct proto_config *p, uint net_ty
static inline struct channel_config *proto_cf_main_channel(struct proto_config *pc)
{ return proto_cf_find_channel(pc, pc->net_type); }

struct channel *proto_find_channel_by_table(struct proto *p, struct rtable *t);
struct channel *proto_find_channel_by_table(struct proto *p, rtable *t);
struct channel *proto_find_channel_by_name(struct proto *p, const char *n);
struct channel *proto_add_channel(struct proto *p, struct channel_config *cf);
int proto_configure_channel(struct proto *p, struct channel **c, struct channel_config *cf);
+67 −41
Original line number Diff line number Diff line
@@ -146,30 +146,21 @@ void fit_copy(struct fib *f, struct fib_iterator *dst, struct fib_iterator *src)
 *	It's guaranteed that there is at most one RTE for every (prefix,proto) pair.
 */

struct rtable_config {
  node n;
  char *name;
  struct config *config;
  struct rtable *table;
  struct proto_config *krt_attached;	/* Kernel syncer attached to this table */
  uint addr_type;			/* Type of address data stored in table (NET_*) */
  int gc_max_ops;			/* Maximum number of operations before GC is run */
  int gc_min_time;			/* Minimum time between two consecutive GC runs */
  byte sorted;				/* Routes of network are sorted according to rte_better() */
  btime min_settle_time;		/* Minimum settle time for notifications */
  btime max_settle_time;		/* Maximum settle time for notifications */
  btime export_settle_time;		/* Delay before exports are announced */
  uint cork_limit;			/* Amount of routes to be pending on export to cork imports */
};

typedef struct rtable {
  resource r;
  node n;				/* Node in list of all tables */
typedef struct rtable_private {
#define RTABLE_PUBLIC \
  resource r; \
  node n;				/* Node in list of all tables */ \
  struct birdloop *loop;		/* This loop runs the table */ \
  char *name;				/* Name of this table */ \
  uint addr_type;			/* Type of address data stored in table (NET_*) */ \
  struct rtable_config *config;		/* Configuration of this table */ \
  struct event *nhu_event;		/* Event to update next hops */ \
  _Atomic byte nhu_state;		/* Next Hop Update state */ \

  RTABLE_PUBLIC;
  pool *rp;				/* Resource pool to allocate everything from, including itself */
  struct slab *rte_slab;		/* Slab to allocate route objects */
  struct fib fib;
  char *name;				/* Name of this table */
  uint addr_type;			/* Type of address data stored in table (NET_*) */
  int use_count;			/* Number of protocols using this table */
  u32 rt_count;				/* Number of routes in the table */

@@ -178,18 +169,15 @@ typedef struct rtable {

  struct hmap id_map;
  struct hostcache *hostcache;
  struct rtable_config *config;		/* Configuration of this table */
  struct event *prune_event;		/* Event to prune abandoned routes */
  struct event *ec_event;		/* Event to prune finished exports */
  struct event *hcu_event;		/* Event to update host cache */
  struct event *nhu_event;		/* Event to update next hops */
  struct event *delete_event;		/* Event to delete the table */
  btime last_rt_change;			/* Last time when route changed */
  btime base_settle_time;		/* Start time of rtable settling interval */
  btime gc_time;			/* Time of last GC */
  int gc_counter;			/* Number of operations since last GC */
  byte prune_state;			/* Table prune state, 1 -> scheduled, 2-> running */
  byte nhu_state;			/* Next Hop Update state */

  byte cork_active;			/* Congestion control activated */

@@ -208,8 +196,35 @@ typedef struct rtable {

  struct rt_pending_export *first_export;	/* First export to announce */
  u64 next_export_seq;			/* The next export will have this ID */
} rtable_private;

typedef union {
  struct { RTABLE_PUBLIC };
  rtable_private priv;
} rtable;

#define RT_LOCK(tab)	({ birdloop_enter((tab)->loop); &(tab)->priv; })
#define RT_UNLOCK(tab)	birdloop_leave((tab)->loop)
#define RT_PRIV(tab)	({ ASSERT_DIE(birdloop_inside((tab)->loop)); &(tab)->priv; })

#define RT_LOCKED(tpub, tpriv)	for (rtable_private *tpriv = RT_LOCK(tpub); tpriv; RT_UNLOCK(tpriv), (tpriv = NULL))

struct rtable_config {
  node n;
  char *name;
  struct config *config;
  rtable *table;
  struct proto_config *krt_attached;	/* Kernel syncer attached to this table */
  uint addr_type;			/* Type of address data stored in table (NET_*) */
  int gc_max_ops;			/* Maximum number of operations before GC is run */
  int gc_min_time;			/* Minimum time between two consecutive GC runs */
  byte sorted;				/* Routes of network are sorted according to rte_better() */
  btime min_settle_time;		/* Minimum settle time for notifications */
  btime max_settle_time;		/* Maximum settle time for notifications */
  btime export_settle_time;		/* Delay before exports are announced */
  uint cork_limit;			/* Amount of routes to be pending on export to cork imports */
};

struct rt_subscription {
  node n;
  rtable *tab;
@@ -244,7 +259,7 @@ struct hostentry {
  ip_addr addr;				/* IP address of host, part of key */
  ip_addr link;				/* (link-local) IP address of host, used as gw
					   if host is directly attached */
  struct rtable *tab;			/* Dependent table, part of key */
  rtable *tab;				/* Dependent table, part of key */
  struct hostentry *next;		/* Next in hash chain */
  unsigned hash_key;			/* Hash key */
  unsigned uc;				/* Use count */
@@ -324,7 +339,7 @@ struct rt_import_hook {
  u8 stale_pruned;			/* Last prune finished when this value was set at stale_valid */
  u8 stale_pruning;			/* Last prune started when this value was set at stale_valid */

  void (*stopped)(struct rt_import_request *);	/* Stored callback when import is stopped */
  struct event *stopped;		/* Event to run when import is stopped */
};

struct rt_pending_export {
@@ -405,7 +420,7 @@ extern struct event_cork rt_cork;
void rt_request_import(rtable *tab, struct rt_import_request *req);
void rt_request_export(rtable *tab, struct rt_export_request *req);

void rt_stop_import(struct rt_import_request *, void (*stopped)(struct rt_import_request *));
void rt_stop_import(struct rt_import_request *, struct event *stopped);
void rt_stop_export(struct rt_export_request *, void (*stopped)(struct rt_export_request *));

const char *rt_import_state_name(u8 state);
@@ -480,27 +495,27 @@ struct config;
void rt_init(void);
void rt_preconfig(struct config *);
void rt_commit(struct config *new, struct config *old);
void rt_lock_table(rtable *);
void rt_unlock_table(rtable *);
void rt_lock_table(rtable_private *);
void rt_unlock_table(rtable_private *);
void rt_subscribe(rtable *tab, struct rt_subscription *s);
void rt_unsubscribe(struct rt_subscription *s);
rtable *rt_setup(pool *, struct rtable_config *);

static inline net *net_find(rtable *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); }
static inline net *net_find_valid(rtable *tab, const net_addr *addr)
static inline net *net_find(rtable_private *tab, const net_addr *addr) { return (net *) fib_find(&tab->fib, addr); }
static inline net *net_find_valid(rtable_private *tab, const net_addr *addr)
{ net *n = net_find(tab, addr); return (n && n->routes && rte_is_valid(&n->routes->rte)) ? n : NULL; }
static inline net *net_get(rtable *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); }
void *net_route(rtable *tab, const net_addr *n);
static inline net *net_get(rtable_private *tab, const net_addr *addr) { return (net *) fib_get(&tab->fib, addr); }
void *net_route(rtable_private *tab, const net_addr *n);
int net_roa_check(rtable *tab, const net_addr *n, u32 asn);
int rt_examine(rtable *t, net_addr *a, struct channel *c, const struct filter *filter);
int rt_examine(rtable_private *t, net_addr *a, struct channel *c, const struct filter *filter);
rte *rt_export_merged(struct channel *c, rte ** feed, uint count, linpool *pool, int silent);

void rt_refresh_begin(struct rt_import_request *);
void rt_refresh_end(struct rt_import_request *);
void rt_schedule_prune(rtable *t);
void rt_schedule_prune(rtable_private *t);
void rte_dump(struct rte_storage *);
void rte_free(struct rte_storage *, rtable *);
struct rte_storage *rte_store(const rte *, net *net, rtable *);
void rte_free(struct rte_storage *, rtable_private *);
struct rte_storage *rte_store(const rte *, net *net, rtable_private *);
void rt_dump(rtable *);
void rt_dump_all(void);
void rt_dump_hooks(rtable *);
@@ -591,7 +606,7 @@ struct rte_src {

typedef struct rta {
  struct rta *next, **pprev;		/* Hash chain */
  _Atomic u32 uc;			/* Use count */
  u32 uc;				/* Use count */
  u32 hash_key;				/* Hash over important fields */
  struct ea_list *eattrs;		/* Extended Attribute chain */
  struct hostentry *hostentry;		/* Hostentry for recursive next-hops */
@@ -732,7 +747,7 @@ struct rte_owner_class {

struct rte_owner {
  struct rte_owner_class *class;
  int (*rte_recalculate)(struct rtable *, struct network *, struct rte *, struct rte *, struct rte *);
  int (*rte_recalculate)(rtable_private *, struct network *, struct rte *, struct rte *, struct rte *);
  HASH(struct rte_src) hash;
  const char *name;
  u32 hash_key;
@@ -863,9 +878,20 @@ static inline size_t rta_size(const rta *a) { return sizeof(rta) + sizeof(u32)*a
#define RTA_MAX_SIZE (sizeof(rta) + sizeof(u32)*MPLS_MAX_LABEL_STACK)
rta *rta_lookup(rta *);			/* Get rta equivalent to this one, uc++ */
static inline int rta_is_cached(rta *r) { return r->cached; }
static inline rta *rta_clone(rta *r) { ASSERT_DIE(0 < atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel)); return r; }
static inline rta *rta_clone(rta *r) {
  RTA_LOCK;
  r->uc++;
  RTA_UNLOCK;
  return r;
}

void rta__free(rta *r);
static inline void rta_free(rta *r) { if (r && (1 == atomic_fetch_sub_explicit(&r->uc, 1, memory_order_acq_rel))) rta__free(r); }
static inline void rta_free(rta *r) {
  RTA_LOCK;
  if (r && !--r->uc)
    rta__free(r);
  RTA_UNLOCK;
}
rta *rta_do_cow(rta *o, linpool *lp);
static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta_do_cow(r, lp) : r; }
static inline void rta_uncache(rta *r) { r->cached = 0; r->uc = 0; }
+1 −10
Original line number Diff line number Diff line
@@ -1287,7 +1287,7 @@ rta_lookup(rta *o)
  for(r=rta_hash_table[h & rta_cache_mask]; r; r=r->next)
    if (r->hash_key == h && rta_same(r, o))
    {
      atomic_fetch_add_explicit(&r->uc, 1, memory_order_acq_rel);
      r->uc++;
      RTA_UNLOCK;
      return r;
    }
@@ -1308,14 +1308,6 @@ rta_lookup(rta *o)
void
rta__free(rta *a)
{
  RTA_LOCK;
  if (atomic_load_explicit(&a->uc, memory_order_acquire))
  {
    /* Somebody has cloned this rta inbetween. This sometimes happens. */
    RTA_UNLOCK;
    return;
  }

  ASSERT(rta_cache_count && a->cached);
  rta_cache_count--;
  *a->pprev = a->next;
@@ -1327,7 +1319,6 @@ rta__free(rta *a)
  ea_free(a->eattrs);
  a->cached = 0;
  sl_free(rta_slab(a), a);
  RTA_UNLOCK;
}

rta *
Loading