Commit ad338d05 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull dcache_readdir() fixes from Al Viro:
 "The couple of patches you'd been OK with; no hlist conversion yet, and
  cursors are still in the list of children"

[ Al is referring to future work to avoid some nasty O(n**2) behavior
  with the readdir cursors when you have lots of concurrent readdirs.

  This is just a fix for a race with a trivial cleanup   - Linus ]

* 'work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  libfs: take cursors out of list when moving past the end of directory
  Fix the locking in dcache_readdir() and friends
parents 015c21ba 26b6c984
Loading
Loading
Loading
Loading
+71 −66
Original line number Diff line number Diff line
@@ -89,58 +89,45 @@ int dcache_dir_close(struct inode *inode, struct file *file)
EXPORT_SYMBOL(dcache_dir_close);

/* parent is locked at least shared */
static struct dentry *next_positive(struct dentry *parent,
				    struct list_head *from,
				    int count)
/*
 * Returns an element of siblings' list.
 * We are looking for <count>th positive after <p>; if
 * found, dentry is grabbed and returned to caller.
 * If no such element exists, NULL is returned.
 */
static struct dentry *scan_positives(struct dentry *cursor,
					struct list_head *p,
					loff_t count,
					struct dentry *last)
{
	unsigned *seq = &parent->d_inode->i_dir_seq, n;
	struct dentry *res;
	struct list_head *p;
	bool skipped;
	int i;
	struct dentry *dentry = cursor->d_parent, *found = NULL;

retry:
	i = count;
	skipped = false;
	n = smp_load_acquire(seq) & ~1;
	res = NULL;
	rcu_read_lock();
	for (p = from->next; p != &parent->d_subdirs; p = p->next) {
	spin_lock(&dentry->d_lock);
	while ((p = p->next) != &dentry->d_subdirs) {
		struct dentry *d = list_entry(p, struct dentry, d_child);
		if (!simple_positive(d)) {
			skipped = true;
		} else if (!--i) {
			res = d;
		// we must at least skip cursors, to avoid livelocks
		if (d->d_flags & DCACHE_DENTRY_CURSOR)
			continue;
		if (simple_positive(d) && !--count) {
			spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
			if (simple_positive(d))
				found = dget_dlock(d);
			spin_unlock(&d->d_lock);
			if (likely(found))
				break;
			count = 1;
		}
		if (need_resched()) {
			list_move(&cursor->d_child, p);
			p = &cursor->d_child;
			spin_unlock(&dentry->d_lock);
			cond_resched();
			spin_lock(&dentry->d_lock);
		}
	rcu_read_unlock();
	if (skipped) {
		smp_rmb();
		if (unlikely(*seq != n))
			goto retry;
	}
	return res;
}

static void move_cursor(struct dentry *cursor, struct list_head *after)
{
	struct dentry *parent = cursor->d_parent;
	unsigned n, *seq = &parent->d_inode->i_dir_seq;
	spin_lock(&parent->d_lock);
	for (;;) {
		n = *seq;
		if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
			break;
		cpu_relax();
	}
	__list_del(cursor->d_child.prev, cursor->d_child.next);
	if (after)
		list_add(&cursor->d_child, after);
	else
		list_add_tail(&cursor->d_child, &parent->d_subdirs);
	smp_store_release(seq, n + 2);
	spin_unlock(&parent->d_lock);
	spin_unlock(&dentry->d_lock);
	dput(last);
	return found;
}

loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
@@ -158,18 +145,26 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
			return -EINVAL;
	}
	if (offset != file->f_pos) {
		file->f_pos = offset;
		if (file->f_pos >= 2) {
		struct dentry *cursor = file->private_data;
			struct dentry *to;
			loff_t n = file->f_pos - 2;
		struct dentry *to = NULL;

		inode_lock_shared(dentry->d_inode);
			to = next_positive(dentry, &dentry->d_subdirs, n);
			move_cursor(cursor, to ? &to->d_child : NULL);

		if (offset > 2)
			to = scan_positives(cursor, &dentry->d_subdirs,
					    offset - 2, NULL);
		spin_lock(&dentry->d_lock);
		if (to)
			list_move(&cursor->d_child, &to->d_child);
		else
			list_del_init(&cursor->d_child);
		spin_unlock(&dentry->d_lock);
		dput(to);

		file->f_pos = offset;

		inode_unlock_shared(dentry->d_inode);
	}
	}
	return offset;
}
EXPORT_SYMBOL(dcache_dir_lseek);
@@ -190,25 +185,35 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
{
	struct dentry *dentry = file->f_path.dentry;
	struct dentry *cursor = file->private_data;
	struct list_head *p = &cursor->d_child;
	struct dentry *next;
	bool moved = false;
	struct list_head *anchor = &dentry->d_subdirs;
	struct dentry *next = NULL;
	struct list_head *p;

	if (!dir_emit_dots(file, ctx))
		return 0;

	if (ctx->pos == 2)
		p = &dentry->d_subdirs;
	while ((next = next_positive(dentry, p, 1)) != NULL) {
		p = anchor;
	else if (!list_empty(&cursor->d_child))
		p = &cursor->d_child;
	else
		return 0;

	while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
		if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
			      d_inode(next)->i_ino, dt_type(d_inode(next))))
			break;
		moved = true;
		p = &next->d_child;
		ctx->pos++;
		p = &next->d_child;
	}
	if (moved)
		move_cursor(cursor, p);
	spin_lock(&dentry->d_lock);
	if (next)
		list_move_tail(&cursor->d_child, &next->d_child);
	else
		list_del_init(&cursor->d_child);
	spin_unlock(&dentry->d_lock);
	dput(next);

	return 0;
}
EXPORT_SYMBOL(dcache_readdir);