Commit 14fc24f3 authored by Ondrej Zajicek (work)'s avatar Ondrej Zajicek (work)
Browse files

Trie: Implement longest-prefix-match queries and walks

The prefix trie now supports longest-prefix-match query by function
trie_match_longest_ipX() and it can be extended to iteration over all
covering prefixes for a given prefix (from longest to shortest) using
TRIE_WALK_TO_ROOT_IPx() macro.
parent 062e69bf
Loading
Loading
Loading
Loading
+51 −0
Original line number Diff line number Diff line
@@ -196,11 +196,61 @@ void tree_walk(const struct f_tree *t, void (*hook)(const struct f_tree *, void
struct f_trie *f_new_trie(linpool *lp, uint data_size);
void *trie_add_prefix(struct f_trie *t, const net_addr *n, uint l, uint h);
int trie_match_net(const struct f_trie *t, const net_addr *n);
int trie_match_longest_ip4(const struct f_trie *t, const net_addr_ip4 *net, net_addr_ip4 *dst, ip4_addr *found0);
int trie_match_longest_ip6(const struct f_trie *t, const net_addr_ip6 *net, net_addr_ip6 *dst, ip6_addr *found0);
void trie_walk_init(struct f_trie_walk_state *s, const struct f_trie *t, const net_addr *from);
int trie_walk_next(struct f_trie_walk_state *s, net_addr *net);
int trie_same(const struct f_trie *t1, const struct f_trie *t2);
void trie_format(const struct f_trie *t, buffer *buf);

static inline int
trie_match_next_longest_ip4(net_addr_ip4 *n, ip4_addr *found)
{
  while (n->pxlen)
  {
    n->pxlen--;
    ip4_clrbit(&n->prefix, n->pxlen);

    if (ip4_getbit(*found, n->pxlen))
      return 1;
  }

  return 0;
}

static inline int
trie_match_next_longest_ip6(net_addr_ip6 *n, ip6_addr *found)
{
  while (n->pxlen)
  {
    n->pxlen--;
    ip6_clrbit(&n->prefix, n->pxlen);

    if (ip6_getbit(*found, n->pxlen))
      return 1;
  }

  return 0;
}


#define TRIE_WALK_TO_ROOT_IP4(trie, net, dst) ({		\
  net_addr_ip4 dst;						\
  ip4_addr _found;						\
  for (int _n = trie_match_longest_ip4(trie, net, &dst, &_found); \
       _n;							\
       _n = trie_match_next_longest_ip4(&dst, &_found))

#define TRIE_WALK_TO_ROOT_IP6(trie, net, dst) ({		\
  net_addr_ip6 dst;						\
  ip6_addr _found;						\
  for (int _n = trie_match_longest_ip6(trie, net, &dst, &_found); \
       _n;							\
       _n = trie_match_next_longest_ip6(&dst, &_found))

#define TRIE_WALK_TO_ROOT_END })


#define TRIE_WALK(trie, net, from) ({				\
  net_addr net;							\
  struct f_trie_walk_state tws_;				\
@@ -209,6 +259,7 @@ void trie_format(const struct f_trie *t, buffer *buf);

#define TRIE_WALK_END })


#define F_CMP_ERROR 999

const char *f_type_name(enum f_type t);
+189 −1
Original line number Diff line number Diff line
@@ -85,7 +85,7 @@
 *
 * Iteration over prefixes in a trie can be done using TRIE_WALK() macro, or
 * directly using trie_walk_init() and trie_walk_next() functions. The second
 * approeach allows suspending the iteration and continuing in it later.
 * approach allows suspending the iteration and continuing in it later.
 * Prefixes are enumerated in the usual lexicographic order and may be
 * restricted to a subset of the trie (all subnets of a specified prefix).
 *
@@ -100,6 +100,13 @@
 * path between the current node and its parent node, stored in the bitmap
 * &accept of the current node) and &local_pos for iteration over intra-node
 * prefixes (stored in the bitmap &local).
 *
 * The trie also supports longest-prefix-match query by trie_match_longest_ip4()
 * and it can be extended to iteration over all covering prefixes for a given
 * prefix (from longest to shortest) using TRIE_WALK_TO_ROOT_IP4() macro. There
 * are also IPv6 versions (for practical reasons, these functions and macros are
 * separate for IPv4 and IPv6). There is the same limitation to enumeration of
 * `implicit' prefixes like with the previous TRIE_WALK() macro.
 */

#include "nest/bird.h"
@@ -541,6 +548,187 @@ trie_match_net(const struct f_trie *t, const net_addr *n)
}


/**
 * trie_match_longest_ip4
 * @t: trie
 * @net: net address
 * @dst: return value
 * @found0: optional returned bitmask of found nodes
 *
 * Perform longest prefix match for the address @net and return the resulting
 * prefix in the buffer @dst. The bitmask @found0 is used to report lengths of
 * prefixes on the path from the root to the resulting prefix. E.g., if there is
 * also a /20 shorter matching prefix, then 20-th bit is set in @found0. This
 * can be used to enumerate all matching prefixes for the network @net using
 * function trie_match_next_longest_ip4() or macro TRIE_WALK_TO_ROOT_IP4().
 *
 * This function assumes IPv4 trie, there is also an IPv6 variant.
 *
 * Result: 1 if a matching prefix was found, 0 if not.
 */
int
trie_match_longest_ip4(const struct f_trie *t, const net_addr_ip4 *net, net_addr_ip4 *dst, ip4_addr *found0)
{
  ASSERT(t->ipv4);

  const struct f_trie_node4 *n = &t->root.v4;
  int len = 0;

  ip4_addr found = IP4_NONE;
  int last = -1;

  while (n)
  {
    /* We are out of path */
    if (!ip4_prefix_equal(net->prefix, n->addr, MIN(net->pxlen, n->plen)))
      goto done;

    /* Check accept mask */
    for (; len < n->plen; len++)
    {
      if (len > net->pxlen)
	goto done;

      if (ip4_getbit(n->accept, len - 1))
      {
	/* len is always < 32 due to len < n->plen */
	ip4_setbit(&found, len);
	last = len;
      }
    }

    /* Special case for max length, there is only one valid local position */
    if (len == IP4_MAX_PREFIX_LENGTH)
    {
      if (n->local & (1u << 1))
	last = len;

      goto done;
    }

    /* Check local mask */
    for (int pos = 1; pos < (1 << TRIE_STEP); pos = 2 * pos + ip4_getbit(net->prefix, len), len++)
    {
      if (len > net->pxlen)
	goto done;

      if (n->local & (1u << pos))
      {
	/* len is always < 32 due to special case above */
	ip4_setbit(&found, len);
	last = len;
      }
    }

    /* Choose child */
    n = n->c[ip4_getbits(net->prefix, n->plen, TRIE_STEP)];
  }

done:
  if (last < 0)
    return 0;

  net_copy_ip4(dst, net);
  dst->prefix = ip4_and(dst->prefix, ip4_mkmask(last));
  dst->pxlen = last;

  if (found0)
    *found0 = found;

  return 1;
}


/**
 * trie_match_longest_ip6
 * @t: trie
 * @net: net address
 * @dst: return value
 * @found0: optional returned bitmask of found nodes
 *
 * Perform longest prefix match for the address @net and return the resulting
 * prefix in the buffer @dst. The bitmask @found0 is used to report lengths of
 * prefixes on the path from the root to the resulting prefix. E.g., if there is
 * also a /20 shorter matching prefix, then 20-th bit is set in @found0. This
 * can be used to enumerate all matching prefixes for the network @net using
 * function trie_match_next_longest_ip6() or macro TRIE_WALK_TO_ROOT_IP6().
 *
 * This function assumes IPv6 trie, there is also an IPv4 variant.
 *
 * Result: 1 if a matching prefix was found, 0 if not.
 */
int
trie_match_longest_ip6(const struct f_trie *t, const net_addr_ip6 *net, net_addr_ip6 *dst, ip6_addr *found0)
{
  ASSERT(!t->ipv4);

  const struct f_trie_node6 *n = &t->root.v6;
  int len = 0;

  ip6_addr found = IP6_NONE;
  int last = -1;

  while (n)
  {
    /* We are out of path */
    if (!ip6_prefix_equal(net->prefix, n->addr, MIN(net->pxlen, n->plen)))
      goto done;

    /* Check accept mask */
    for (; len < n->plen; len++)
    {
      if (len > net->pxlen)
	goto done;

      if (ip6_getbit(n->accept, len - 1))
      {
	/* len is always < 128 due to len < n->plen */
	ip6_setbit(&found, len);
	last = len;
      }
    }

    /* Special case for max length, there is only one valid local position */
    if (len == IP6_MAX_PREFIX_LENGTH)
    {
      if (n->local & (1u << 1))
	last = len;

      goto done;
    }

    /* Check local mask */
    for (int pos = 1; pos < (1 << TRIE_STEP); pos = 2 * pos + ip6_getbit(net->prefix, len), len++)
    {
      if (len > net->pxlen)
	goto done;

      if (n->local & (1u << pos))
      {
	/* len is always < 128 due to special case above */
	ip6_setbit(&found, len);
	last = len;
      }
    }

    /* Choose child */
    n = n->c[ip6_getbits(net->prefix, n->plen, TRIE_STEP)];
  }

done:
  if (last < 0)
    return 0;

  net_copy_ip6(dst, net);
  dst->prefix = ip6_and(dst->prefix, ip6_mkmask(last));
  dst->pxlen = last;

  if (found0)
    *found0 = found;

  return 1;
}

#define SAME_PREFIX(A,B,X,L) ((X) ? ip4_prefix_equal((A)->v4.addr, net4_prefix(B), (L)) : ip6_prefix_equal((A)->v6.addr, net6_prefix(B), (L)))
#define GET_NET_BITS(N,X,A,B) ((X) ? ip4_getbits(net4_prefix(N), (A), (B)) : ip6_getbits(net6_prefix(N), (A), (B)))

+115 −0
Original line number Diff line number Diff line
@@ -774,6 +774,120 @@ t_trie_walk(void)
  return 1;
}

static int
find_covering_nets(struct f_prefix *prefixes, int num, const net_addr *net, net_addr *found)
{
  struct f_prefix key;
  net_addr *n = &key.net;
  int found_num = 0;

  net_copy(n, net);

  while (1)
  {
    struct f_prefix *px =
      bsearch(&key, prefixes, num, sizeof(struct f_prefix), compare_prefixes);

    if (px)
    {
      net_copy(&found[found_num], n);
      found_num++;
    }

    if (n->pxlen == 0)
      return found_num;

    n->pxlen--;

    if (n->type == NET_IP4)
      ip4_clrbit(&((net_addr_ip4 *) n)->prefix, n->pxlen);
    else
      ip6_clrbit(&((net_addr_ip6 *) n)->prefix, n->pxlen);
  }
}

static int
t_trie_walk_to_root(void)
{
  bt_bird_init();
  bt_config_parse(BT_CONFIG_SIMPLE);

  linpool *lp = lp_new_default(&root_pool);
  for (int round = 0; round < TESTS_NUM * 4; round++)
  {
    int level = round / TESTS_NUM;
    int v6 = level % 2;
    int num = PREFIXES_NUM  * (int[]){32, 512}[level / 2];
    int pos = 0;
    int st = 0, sn = 0, sm = 0;

    list *prefixes = make_random_prefix_list(lp, num, v6, 1);
    struct f_trie *trie = make_trie_from_prefix_list(lp, prefixes);
    struct f_prefix *pxset = malloc((num + 1) * sizeof(struct f_prefix));

    struct f_prefix_node *pxn;
    WALK_LIST(pxn, *prefixes)
      pxset[pos++] = pxn->prefix;
    memset(&pxset[pos], 0, sizeof (struct f_prefix));

    qsort(pxset, num, sizeof(struct f_prefix), compare_prefixes);

    int i;
    for (i = 0; i < (PREFIX_TESTS_NUM / 10); i++)
    {
      net_addr from;
      get_random_net(&from, v6);

      net_addr found[129];
      int found_num = find_covering_nets(pxset, num, &from, found);
      int n = 0;

      if (bt_verbose >= BT_VERBOSE_ABSOLUTELY_ALL)
      {
	char buf[64];
	bt_format_net(buf, 64, &from);
	bt_debug("Lookup for %s (expect %d)\n", buf, found_num);
      }

      /* Walk to root, separate for IPv4 and IPv6 */
      if (!v6)
      {
	TRIE_WALK_TO_ROOT_IP4(trie, (net_addr_ip4 *) &from, net)
	{
	  log_networks((net_addr *) &net, &found[n]);
	  bt_assert((n < found_num) && net_equal((net_addr *) &net, &found[n]));
	  n++;
	}
	TRIE_WALK_TO_ROOT_END;
      }
      else
      {
	TRIE_WALK_TO_ROOT_IP6(trie, (net_addr_ip6 *) &from, net)
	{
	  log_networks((net_addr *) &net, &found[n]);
	  bt_assert((n < found_num) && net_equal((net_addr *) &net, &found[n]));
	  n++;
	}
	TRIE_WALK_TO_ROOT_END;
      }

      bt_assert(n == found_num);

      /* Stats */
      st += n;
      sn += !!n;
      sm = MAX(sm, n);
    }

    bt_debug("Success in %d / %d, sum %d, max %d\n", sn, i, st, sm);

    lp_flush(lp);
  }

  bt_bird_cleanup();
  return 1;
}

int
main(int argc, char *argv[])
{
@@ -784,6 +898,7 @@ main(int argc, char *argv[])
  bt_test_suite(t_match_outer_net, "Testing random outer prefix matching");
  bt_test_suite(t_trie_same, "A trie filled forward should be same with a trie filled backward.");
  bt_test_suite(t_trie_walk, "Testing TRIE_WALK() on random tries");
  bt_test_suite(t_trie_walk_to_root, "Testing TRIE_WALK_TO_ROOT() on random tries");

  // bt_test_suite(t_bench_trie_datasets_subset, "Benchmark tries from datasets by random subset of nets");
  // bt_test_suite(t_bench_trie_datasets_random, "Benchmark tries from datasets by generated addresses");
+4 −1
Original line number Diff line number Diff line
@@ -510,7 +510,10 @@ bt_fmt_ipa(char *buf, size_t size, const void *data)
void
bt_format_net(char *buf, size_t size, const void *data)
{
  if (data)
    bsnprintf(buf, size, "%N", (const net_addr *) data);
  else
    bsnprintf(buf, size, "(null)");
}

int