Commit e1c13a5a authored by Jan Moskyto Matejka's avatar Jan Moskyto Matejka
Browse files

Unix: Rework of select-loop to poll-loop

This should lift the limit of FD_SETSIZE and allow more than 1024 fd's.
FD_SETSIZE limit doesn't matter now when creating new sockets.
parent ce95af7a
Loading
Loading
Loading
Loading
+47 −54
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/un.h>
#include <poll.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
@@ -41,12 +42,12 @@
#include "lib/sysio.h"

/* Maximum number of calls of tx handler for one socket in one
 * select iteration. Should be small enough to not monopolize CPU by
 * poll iteration. Should be small enough to not monopolize CPU by
 * one protocol instance.
 */
#define MAX_STEPS 4

/* Maximum number of calls of rx handler for all sockets in one select
/* Maximum number of calls of rx handler for all sockets in one poll
   iteration. RX callbacks are often much more costly so we limit
   this to gen small latencies */
#define MAX_RX_STEPS 4
@@ -1022,7 +1023,6 @@ sk_log_error(sock *s, const char *p)
static list sock_list;
static struct birdsock *current_sock;
static struct birdsock *stored_sock;
static int sock_recalc_fdsets_p;

static inline sock *
sk_next(sock *s)
@@ -1078,7 +1078,6 @@ sk_free(resource *r)
    if (s == stored_sock)
      stored_sock = sk_next(s);
    rem_node(&s->n);
    sock_recalc_fdsets_p = 1;
  }
}

@@ -1276,7 +1275,6 @@ static void
sk_insert(sock *s)
{
  add_tail(&sock_list, &s->n);
  sock_recalc_fdsets_p = 1;
}

static void
@@ -1328,18 +1326,6 @@ sk_passive_connected(sock *s, int type)
      log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
  }

  if (fd >= FD_SETSIZE)
  {
    /* FIXME: Call err_hook instead ? */
    log(L_ERR "SOCK: Incoming connection from %I%J (port %d) %s",
	t->daddr, ipa_is_link_local(t->daddr) ? t->iface : NULL,
	t->dport, "rejected due to FD_SETSIZE limit");
    close(fd);
    t->fd = -1;
    rfree(t);
    return 1;
  }

  if (sk_setup(t) < 0)
  {
    /* FIXME: Call err_hook instead ? */
@@ -1416,9 +1402,6 @@ sk_open(sock *s)
  if (fd < 0)
    ERR("socket");

  if (fd >= FD_SETSIZE)
    ERR2("FD_SETSIZE limit reached");

  s->af = af;
  s->fd = fd;

@@ -2062,15 +2045,15 @@ static int short_loops = 0;
void
io_loop(void)
{
  fd_set rd, wr;
  struct timeval timo;
  int poll_tout;
  time_t tout;
  int hi, events;
  int nfds, events;
  sock *s;
  node *n;
  int fdmax = 256;
  struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));

  watchdog_start1();
  sock_recalc_fdsets_p = 1;
  for(;;)
    {
      events = ev_run_list(&global_event_list);
@@ -2081,43 +2064,43 @@ io_loop(void)
	  tm_shot();
	  continue;
	}
      timo.tv_sec = events ? 0 : MIN(tout - now, 3);
      timo.tv_usec = 0;
      poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */

      io_close_event();

      if (sock_recalc_fdsets_p)
	{
	  sock_recalc_fdsets_p = 0;
	  FD_ZERO(&rd);
	  FD_ZERO(&wr);
	}

      hi = 0;
      nfds = 0;
      WALK_LIST(n, sock_list)
	{
	  pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
	  s = SKIP_BACK(sock, n, n);
	  if (s->rx_hook)
	    {
	      FD_SET(s->fd, &rd);
	      if (s->fd > hi)
		hi = s->fd;
	      pfd[nfds].fd = s->fd;
	      pfd[nfds].events |= POLLIN;
	    }
	  else
	    FD_CLR(s->fd, &rd);
	  if (s->tx_hook && s->ttx != s->tpos)
	    {
	      FD_SET(s->fd, &wr);
	      if (s->fd > hi)
		hi = s->fd;
	      pfd[nfds].fd = s->fd;
	      pfd[nfds].events |= POLLOUT;
	    }
	  if (pfd[nfds].fd != -1)
	    {
	      s->index = nfds;
	      nfds++;
	    }
	  else
	    FD_CLR(s->fd, &wr);
	    s->index = -1;

	  if (nfds >= fdmax)
	    {
	      fdmax *= 2;
	      pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
	    }
	}

      /*
       * Yes, this is racy. But even if the signal comes before this test
       * and entering select(), it gets caught on the next timer tick.
       * and entering poll(), it gets caught on the next timer tick.
       */

      if (async_config_flag)
@@ -2142,18 +2125,18 @@ io_loop(void)
	  continue;
	}

      /* And finally enter select() to find active sockets */
      /* And finally enter poll() to find active sockets */
      watchdog_stop();
      hi = select(hi+1, &rd, &wr, NULL, &timo);
      events = poll(pfd, nfds, poll_tout);
      watchdog_start();

      if (hi < 0)
      if (events < 0)
	{
	  if (errno == EINTR || errno == EAGAIN)
	    continue;
	  die("select: %m");
	  die("poll: %m");
	}
      if (hi)
      if (events)
	{
	  /* guaranteed to be non-empty */
	  current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
@@ -2161,11 +2144,17 @@ io_loop(void)
	  while (current_sock)
	    {
	      sock *s = current_sock;
	      if (s->index == -1)
		{
		  current_sock = sk_next(s);
		  goto next;
		}

	      int e;
	      int steps;

	      steps = MAX_STEPS;
	      if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
	      if ((s->type >= SK_MAGIC) && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
		do
		  {
		    steps--;
@@ -2177,7 +2166,7 @@ io_loop(void)
		while (e && s->rx_hook && steps);

	      steps = MAX_STEPS;
	      if (FD_ISSET(s->fd, &wr))
	      if (pfd[s->index].revents & POLLOUT)
		do
		  {
		    steps--;
@@ -2204,13 +2193,17 @@ io_loop(void)
	  while (current_sock && count < MAX_RX_STEPS)
	    {
	      sock *s = current_sock;
	      int e UNUSED;
	      if (s->index == -1)
		{
		  current_sock = sk_next(s);
		  goto next2;
		}

	      if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook)
	      if ((s->type < SK_MAGIC) && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
		{
		  count++;
		  io_log_event(s->rx_hook, s->data);
		  e = sk_read(s);
		  sk_read(s);
		  if (s != current_sock)
		      goto next2;
		}