Commit 7c006269 authored by Maria Matejka's avatar Maria Matejka
Browse files

Worker: Lesser load on spinlock when flushing readers

parent 6d5bb7bf
Loading
Loading
Loading
Loading
+20 −0
Original line number Diff line number Diff line
@@ -138,6 +138,26 @@ init_list(list *l)
  l->tail = &l->head_node;
}

/**
 * add_head_list - concatenate two lists
 * @to: destination list
 * @l: source list
 *
 * This function prepends all elements of the list @l to
 * the list @to in constant time.
 */
LIST_INLINE void
add_head_list(list *to, list *l)
{
  /* Join the lists */
  to->head->prev = l->tail;
  l->tail->next = to->head;

  /* Fix the header */
  to->head = l->head;
  to->head->prev = &to->head_node;
}

/**
 * add_tail_list - concatenate two lists
 * @to: destination list
+1 −0
Original line number Diff line number Diff line
@@ -78,6 +78,7 @@ void add_tail(list *, node *);
void add_head(list *, node *);
void rem_node(node *);
void move_list(list *dest, list *src);
void add_head_list(list *, list *);
void add_tail_list(list *, list *);
void init_list(list *);
void insert_node(node *, node *);
+58 −27
Original line number Diff line number Diff line
@@ -145,6 +145,22 @@ _Atomic u64 wql_cnt = 0;

static _Thread_local int worker_sleeping = 1;

static inline void WQ_LOCK_PREFETCH(void *ptr, ...)
{
  __builtin_prefetch(&wq->lock);

  va_list args;
  va_start(args, ptr);

  while (ptr)
  {
    __builtin_prefetch(ptr);
    ptr = va_arg(args, void *);
  }

  va_end(args);
}

static inline void WQ_LOCK(void)
{
  WASSERT(!worker_sleeping);
@@ -205,9 +221,6 @@ static inline void WQ_UNLOCK(void)
#endif
}

#define WQ_LOCKED MACRO_PACK_BEFORE_AFTER(WQ_LOCK(), WQ_UNLOCK())
#define WQ_LOCKED_BOOL(...) (WQ_LOCK(), !!(__VA_ARGS__) + (WQ_UNLOCK(), 0))

static inline void SEM_INIT(sem_t *s, uint val)
{
  if (sem_init(s, 0, val) < 0)
@@ -378,16 +391,8 @@ retry: do { \
    bug("Lock state value shall never change while in slowpath"); \
} while (0)

#define TASK_PREPEND(d, t) do { \
  t->flags |= TF_PREPENDED; \
  WQ_LOCK(); \
  add_head(&wq->pending, &((t)->n)); \
  WQ_UNLOCK(); \
  SEM_POST(&wq->waiting); \
} while (0)

#define TASK_STOP_WORKER do { \
  atomic_fetch_add(&wq->stop, 1); \
  atomic_fetch_add_explicit(&wq->stop, 1, memory_order_acquire); \
  SEM_POST(&wq->waiting); \
} while (0);

@@ -701,6 +706,8 @@ domain_unlock_writers(struct domain *d, u64 lock, u64 ulock)

  if (d->wrtasks_n)
  {
    WQ_LOCK_PREFETCH(wq->pending.head);

    WDBG("-> There is a writer task waiting for us (n=%u)\n", d->wrtasks_n);
    if ((ulock & DOMAIN_LOCK_WRLOCKED_BIT) || DOMAIN_LOCK_PREPENDED(ulock))
      wimpossible();
@@ -718,7 +725,11 @@ domain_unlock_writers(struct domain *d, u64 lock, u64 ulock)
      ((d->rdtasks_n || d->rdsem_n) ? DOMAIN_LOCK_READERS_BIT : 0));

    /* Prepend the task */
    TASK_PREPEND(d, t);
    t->flags |= TF_PREPENDED;
    WQ_LOCK();
    add_head(&wq->pending, &((t)->n));
    WQ_UNLOCK();
    SEM_POST(&wq->waiting);

    return;
  }
@@ -818,6 +829,10 @@ domain_write_unlock(struct domain *d)
 * So we sometimes flush the readers instead of locking another pending writer. */
  if (r && (!w || (r > 3*w)))
  {
    list tmp_rdtasks;

    WQ_LOCK_PREFETCH(wq->pending.head);

    WDBG("-> Flushing readers: WP=%u WS=%u RP=%u RS=%u\n",
	d->wrtasks_n, d->wrsem_n, d->rdtasks_n, d->rdsem_n);

@@ -830,7 +845,7 @@ domain_write_unlock(struct domain *d)
    d->rdsem_n = 0;

    /* Move the tasks to a temporary list */
    list tmp_rdtasks;
    if (rdtasks_n)
      move_list(&tmp_rdtasks, &d->rdtasks);

    DOMAIN_LOCK_EXIT_SLOWPATH(
@@ -841,18 +856,28 @@ domain_write_unlock(struct domain *d)
	/* If writers are remaining, block other writers */
	((d->wrtasks_n || d->wrsem_n) ? DOMAIN_LOCK_WRITERS_BIT : 0));

    if (rdtasks_n)
    {
      /* Put the prepended tasks into queue */
      u64 prepend_check = 0;
    struct task *t, *tt;
    WALK_LIST_BACKWARDS_DELSAFE(t, tt, tmp_rdtasks)
      struct task *t;
      WALK_LIST(t, tmp_rdtasks)
      {
      rem_node(&t->n);
      TASK_PREPEND(d, t);
	t->flags |= TF_PREPENDED;
	prepend_check++;
      }
   
    if (prepend_check != rdtasks_n || !EMPTY_LIST(tmp_rdtasks))
      wbug("This shall never happen");
      /* Check the right number of waiting tasks */   
      if (prepend_check != rdtasks_n)
	wimpossible();

      WQ_LOCK();
      add_head_list(&wq->pending, &tmp_rdtasks);
      WQ_UNLOCK();

      for (uint i=0; i<prepend_check; i++)
	SEM_POST(&wq->waiting);
    }

    /* Unlock the waiting secondary readers */
    for ( ; rdsem_n; rdsem_n--)
@@ -889,6 +914,8 @@ worker_loop(void *_data UNUSED)
 
  /* Run the loop */
  while (1) {
    WQ_LOCK_PREFETCH(wq->pending.head, NULL);

    if (!SEM_TRYWAIT(&wq->waiting))
    {
      WDBG("Worker will wait\n");
@@ -914,6 +941,7 @@ worker_loop(void *_data UNUSED)
    }

    WQ_LOCK();

    /* Is there a pending task? */
    if (!EMPTY_LIST(wq->pending))
    {
@@ -1215,11 +1243,14 @@ task_push(struct task *t)
  WDBG("Task push\n");

  /* Stopping, won't accept tasks */
  if (atomic_load(&wq->max_workers) == 0)
  if (atomic_load_explicit(&wq->max_workers, memory_order_relaxed) == 0)
    return;

  /* Will add_tail to the pending tasks list */
  WQ_LOCK_PREFETCH(wq->pending.tail, NULL);

  /* Is there an available worker right now? */
  if ((atomic_load(&wq->blocked) == 0) && SEM_TRYWAIT(&wq->available))
  if ((atomic_load_explicit(&wq->blocked, memory_order_relaxed) == 0) && SEM_TRYWAIT(&wq->available))
    return task_push_available(t);
  else
    return task_push_block(t);
+10 −7
Original line number Diff line number Diff line
@@ -9,8 +9,10 @@
#include "lib/atomic.h"
#include "conf/conf.h"

//#define TEST_MAX (1 << 18)
#define TEST_MAX (1 << 12)
#define TEST_MAX (1 << 15)
//#define TEST_MAX (1 << 10)

#define FROB  31

struct t_rwlock_task {
  struct task task;
@@ -19,7 +21,7 @@ struct t_rwlock_task {
  _Atomic uint *total_counter;
  _Atomic uint *allocated;
  uint sink;
  uint frobnicator[42];
  uint frobnicator[FROB];
};

static void t_rwlock_execute(struct task *task)
@@ -36,12 +38,13 @@ static void t_rwlock_execute(struct task *task)
  uint tot = atomic_fetch_add(t->total_counter, 1);

  /* Spin for some time to mimic some reasonable work */
  for (uint i=0; i<42; i++)
  for (uint i=0; i<FROB; i++)
    t->frobnicator[i] = (i+1) * (2*i + 1) * 3535353559;

  for (uint i=0; i<42; i++)
    for (uint j=0; j<42; j++)
      t->sink += (t->frobnicator[i] ^= -t->frobnicator[j]) * 3535353559;
  for (uint i=0; i<FROB; i++)
    for (uint j=0; j<FROB; j++)
      for (uint k=0; k<FROB; k++)
	t->sink += (t->frobnicator[i] ^= t->frobnicator[k] - t->frobnicator[j]) * 3535353559;

  if (t->domain)
    switch (t->howtolock) {