Commit 7e934cf5 authored by Matthew Wilcox (Oracle)'s avatar Matthew Wilcox (Oracle)
Browse files

xarray: Fix early termination of xas_for_each_marked



xas_for_each_marked() is using entry == NULL as a termination condition
of the iteration. When xas_for_each_marked() is used protected only by
RCU, this can however race with xas_store(xas, NULL) in the following
way:

TASK1                                   TASK2
page_cache_delete()         	        find_get_pages_range_tag()
                                          xas_for_each_marked()
                                            xas_find_marked()
                                              off = xas_find_chunk()

  xas_store(&xas, NULL)
    xas_init_marks(&xas);
    ...
    rcu_assign_pointer(*slot, NULL);
                                              entry = xa_entry(off);

And thus xas_for_each_marked() terminates prematurely possibly leading
to missed entries in the iteration (translating to missing writeback of
some pages or a similar problem).

If we find a NULL entry that has been marked, skip it (unless we're trying
to allocate an entry).

Reported-by: default avatarJan Kara <jack@suse.cz>
CC: stable@vger.kernel.org
Fixes: ef8e5717 ("page cache: Convert delete_batch to XArray")
Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
parent 34eee836
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -1648,6 +1648,7 @@ static inline void *xas_next_marked(struct xa_state *xas, unsigned long max,
								xa_mark_t mark)
{
	struct xa_node *node = xas->xa_node;
	void *entry;
	unsigned int offset;

	if (unlikely(xas_not_node(node) || node->shift))
@@ -1659,7 +1660,10 @@ static inline void *xas_next_marked(struct xa_state *xas, unsigned long max,
		return NULL;
	if (offset == XA_CHUNK_SIZE)
		return xas_find_marked(xas, max, mark);
	return xa_entry(xas->xa, node, offset);
	entry = xa_entry(xas->xa, node, offset);
	if (!entry)
		return xas_find_marked(xas, max, mark);
	return entry;
}

/*
+2 −0
Original line number Diff line number Diff line
@@ -1208,6 +1208,8 @@ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
		}

		entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
		if (!entry && !(xa_track_free(xas->xa) && mark == XA_FREE_MARK))
			continue;
		if (!xa_is_node(entry))
			return entry;
		xas->xa_node = xa_to_node(entry);
+2 −2
Original line number Diff line number Diff line
@@ -7,8 +7,8 @@ LDLIBS+= -lpthread -lurcu
TARGETS = main idr-test multiorder xarray
CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
	 regression4.o \
	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
	 regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \
	 iteration_check_2.o benchmark.o

ifndef SHIFT
	SHIFT=3
+87 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * iteration_check_2.c: Check that deleting a tagged entry doesn't cause
 * an RCU walker to finish early.
 * Copyright (c) 2020 Oracle
 * Author: Matthew Wilcox <willy@infradead.org>
 */
#include <pthread.h>
#include "test.h"

static volatile bool test_complete;

static void *iterator(void *arg)
{
	XA_STATE(xas, arg, 0);
	void *entry;

	rcu_register_thread();

	while (!test_complete) {
		xas_set(&xas, 0);
		rcu_read_lock();
		xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
			;
		rcu_read_unlock();
		assert(xas.xa_index >= 100);
	}

	rcu_unregister_thread();
	return NULL;
}

static void *throbber(void *arg)
{
	struct xarray *xa = arg;

	rcu_register_thread();

	while (!test_complete) {
		int i;

		for (i = 0; i < 100; i++) {
			xa_store(xa, i, xa_mk_value(i), GFP_KERNEL);
			xa_set_mark(xa, i, XA_MARK_0);
		}
		for (i = 0; i < 100; i++)
			xa_erase(xa, i);
	}

	rcu_unregister_thread();
	return NULL;
}

void iteration_test2(unsigned test_duration)
{
	pthread_t threads[2];
	DEFINE_XARRAY(array);
	int i;

	printv(1, "Running iteration test 2 for %d seconds\n", test_duration);

	test_complete = false;

	xa_store(&array, 100, xa_mk_value(100), GFP_KERNEL);
	xa_set_mark(&array, 100, XA_MARK_0);

	if (pthread_create(&threads[0], NULL, iterator, &array)) {
		perror("create iterator thread");
		exit(1);
	}
	if (pthread_create(&threads[1], NULL, throbber, &array)) {
		perror("create throbber thread");
		exit(1);
	}

	sleep(test_duration);
	test_complete = true;

	for (i = 0; i < 2; i++) {
		if (pthread_join(threads[i], NULL)) {
			perror("pthread_join");
			exit(1);
		}
	}

	xa_destroy(&array);
}
+1 −0
Original line number Diff line number Diff line
@@ -311,6 +311,7 @@ int main(int argc, char **argv)
	regression4_test();
	iteration_test(0, 10 + 90 * long_run);
	iteration_test(7, 10 + 90 * long_run);
	iteration_test2(10 + 90 * long_run);
	single_thread_tests(long_run);

	/* Free any remaining preallocated nodes */
Loading