Commit 227747fb authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull misc AFS fixes from David Howells:
 "This fixes a set of miscellaneous issues in the afs filesystem,
  including:

   - leak of keys on file close.

   - broken error handling in xattr functions.

   - missing locking when updating VL server list.

   - volume location server DNS lookup whereby preloaded cells may not
     ever get a lookup and regular DNS lookups to maintain server lists
     consume power unnecessarily.

   - incorrect error propagation and handling in the fileserver
     iteration code causes operations to sometimes apparently succeed.

   - interruption of server record check/update side op during
     fileserver iteration causes uninterruptible main operations to fail
     unexpectedly.

   - callback promise expiry time miscalculation.

   - over invalidation of the callback promise on directories.

   - double locking on callback break waking up file locking waiters.

   - double increment of the vnode callback break counter.

  Note that it makes some changes outside of the afs code, including:

   - an extra parameter to dns_query() to allow the dns_resolver key
     just accessed to be immediately invalidated. AFS is caching the
     results itself, so the key can be discarded.

   - an interruptible version of wait_var_event().

   - an rxrpc function to allow the maximum lifespan to be set on a
     call.

   - a way for an rxrpc call to be marked as non-interruptible"

* tag 'afs-fixes-20190516' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  afs: Fix double inc of vnode->cb_break
  afs: Fix lock-wait/callback-break double locking
  afs: Don't invalidate callback if AFS_VNODE_DIR_VALID not set
  afs: Fix calculation of callback expiry time
  afs: Make dynamic root population wait uninterruptibly for proc_cells_lock
  afs: Make some RPC operations non-interruptible
  rxrpc: Allow the kernel to mark a call as being non-interruptible
  afs: Fix error propagation from server record check/update
  afs: Fix the maximum lifespan of VL and probe calls
  rxrpc: Provide kernel interface to set max lifespan on a call
  afs: Fix "kAFS: AFS vnode with undefined type 0"
  afs: Fix cell DNS lookup
  Add wait_var_event_interruptible()
  dns_resolver: Allow used keys to be invalidated
  afs: Fix afs_cell records to always have a VL server list record
  afs: Fix missing lock when replacing VL server list
  afs: Fix afs_xattr_get_yfs() to not try freeing an error value
  afs: Fix incorrect error handling in afs_xattr_get_acl()
  afs: Fix key leak in afs_release() and afs_evict_inode()
parents 1d9d7cbf fd711586
Loading
Loading
Loading
Loading
+20 −1
Original line number Diff line number Diff line
@@ -796,7 +796,9 @@ The kernel interface functions are as follows:
				s64 tx_total_len,
				gfp_t gfp,
				rxrpc_notify_rx_t notify_rx,
				bool upgrade);
				bool upgrade,
				bool intr,
				unsigned int debug_id);

     This allocates the infrastructure to make a new RxRPC call and assigns
     call and connection numbers.  The call will be made on the UDP port that
@@ -824,6 +826,13 @@ The kernel interface functions are as follows:
     the server upgrade the service to a better one.  The resultant service ID
     is returned by rxrpc_kernel_recv_data().

     intr should be set to true if the call should be interruptible.  If this
     is not set, this function may not return until a channel has been
     allocated; if it is set, the function may return -ERESTARTSYS.

     debug_id is the call debugging ID to be used for tracing.  This can be
     obtained by atomically incrementing rxrpc_debug_id.

     If this function is successful, an opaque reference to the RxRPC call is
     returned.  The caller now holds a reference on this and it must be
     properly ended.
@@ -1056,6 +1065,16 @@ The kernel interface functions are as follows:
     This value can be used to determine if the remote client has been
     restarted as it shouldn't change otherwise.

 (*) Set the maxmimum lifespan on a call.

	void rxrpc_kernel_set_max_life(struct socket *sock,
				       struct rxrpc_call *call,
				       unsigned long hard_timeout)

     This sets the maximum lifespan on a call to hard_timeout (which is in
     jiffies).  In the event of the timeout occurring, the call will be
     aborted and -ETIME or -ETIMEDOUT will be returned.


=======================
CONFIGURABLE PARAMETERS
+1 −1
Original line number Diff line number Diff line
@@ -251,7 +251,7 @@ struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry
	_enter("%s", cell->name);

	ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
			&result, _expiry);
			&result, _expiry, true);
	if (ret < 0) {
		_leave(" = %d [dns]", ret);
		return ERR_PTR(ret);
+3 −0
Original line number Diff line number Diff line
@@ -23,6 +23,9 @@
#define AFSPATHMAX		1024	/* Maximum length of a pathname plus NUL */
#define AFSOPAQUEMAX		1024	/* Maximum length of an opaque field */

#define AFS_VL_MAX_LIFESPAN	(120 * HZ)
#define AFS_PROBE_MAX_LIFESPAN	(30 * HZ)

typedef u64			afs_volid_t;
typedef u64			afs_vnodeid_t;
typedef u64			afs_dataversion_t;
+1 −7
Original line number Diff line number Diff line
@@ -218,14 +218,8 @@ void __afs_break_callback(struct afs_vnode *vnode)
		vnode->cb_break++;
		afs_clear_permits(vnode);

		spin_lock(&vnode->lock);

		_debug("break callback");

		if (list_empty(&vnode->granted_locks) &&
		    !list_empty(&vnode->pending_locks))
		if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
			afs_lock_may_be_available(vnode);
		spin_unlock(&vnode->lock);
	}
}

+113 −74
Original line number Diff line number Diff line
@@ -123,6 +123,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
				       const char *name, unsigned int namelen,
				       const char *addresses)
{
	struct afs_vlserver_list *vllist;
	struct afs_cell *cell;
	int i, ret;

@@ -151,18 +152,14 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,

	atomic_set(&cell->usage, 2);
	INIT_WORK(&cell->manager, afs_manage_cell);
	cell->flags = ((1 << AFS_CELL_FL_NOT_READY) |
		       (1 << AFS_CELL_FL_NO_LOOKUP_YET));
	INIT_LIST_HEAD(&cell->proc_volumes);
	rwlock_init(&cell->proc_lock);
	rwlock_init(&cell->vl_servers_lock);

	/* Fill in the VL server list if we were given a list of addresses to
	 * use.
	/* Provide a VL server list, filling it in if we were given a list of
	 * addresses to use.
	 */
	if (addresses) {
		struct afs_vlserver_list *vllist;

		vllist = afs_parse_text_addrs(net,
					      addresses, strlen(addresses), ':',
					      VL_SERVICE, AFS_VL_PORT);
@@ -171,19 +168,32 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
			goto parse_failed;
		}

		rcu_assign_pointer(cell->vl_servers, vllist);
		vllist->source = DNS_RECORD_FROM_CONFIG;
		vllist->status = DNS_LOOKUP_NOT_DONE;
		cell->dns_expiry = TIME64_MAX;
		__clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags);
	} else {
		ret = -ENOMEM;
		vllist = afs_alloc_vlserver_list(0);
		if (!vllist)
			goto error;
		vllist->source = DNS_RECORD_UNAVAILABLE;
		vllist->status = DNS_LOOKUP_NOT_DONE;
		cell->dns_expiry = ktime_get_real_seconds();
	}

	rcu_assign_pointer(cell->vl_servers, vllist);

	cell->dns_source = vllist->source;
	cell->dns_status = vllist->status;
	smp_store_release(&cell->dns_lookup_count, 1); /* vs source/status */

	_leave(" = %p", cell);
	return cell;

parse_failed:
	if (ret == -EINVAL)
		printk(KERN_ERR "kAFS: bad VL server IP address\n");
error:
	kfree(cell);
	_leave(" = %d", ret);
	return ERR_PTR(ret);
@@ -208,6 +218,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,
{
	struct afs_cell *cell, *candidate, *cursor;
	struct rb_node *parent, **pp;
	enum afs_cell_state state;
	int ret, n;

	_enter("%s,%s", name, vllist);
@@ -267,18 +278,16 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net,

wait_for_cell:
	_debug("wait_for_cell");
	ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NOT_READY, TASK_INTERRUPTIBLE);
	smp_rmb();

	switch (READ_ONCE(cell->state)) {
	case AFS_CELL_FAILED:
	wait_var_event(&cell->state,
		       ({
			       state = smp_load_acquire(&cell->state); /* vs error */
			       state == AFS_CELL_ACTIVE || state == AFS_CELL_FAILED;
		       }));

	/* Check the state obtained from the wait check. */
	if (state == AFS_CELL_FAILED) {
		ret = cell->error;
		goto error;
	default:
		_debug("weird %u %d", cell->state, cell->error);
		goto error;
	case AFS_CELL_ACTIVE:
		break;
	}

	_leave(" = %p [cell]", cell);
@@ -360,16 +369,46 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
/*
 * Update a cell's VL server address list from the DNS.
 */
static void afs_update_cell(struct afs_cell *cell)
static int afs_update_cell(struct afs_cell *cell)
{
	struct afs_vlserver_list *vllist, *old;
	struct afs_vlserver_list *vllist, *old = NULL, *p;
	unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl);
	unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl);
	time64_t now, expiry = 0;
	int ret = 0;

	_enter("%s", cell->name);

	vllist = afs_dns_query(cell, &expiry);
	if (IS_ERR(vllist)) {
		ret = PTR_ERR(vllist);

		_debug("%s: fail %d", cell->name, ret);
		if (ret == -ENOMEM)
			goto out_wake;

		ret = -ENOMEM;
		vllist = afs_alloc_vlserver_list(0);
		if (!vllist)
			goto out_wake;

		switch (ret) {
		case -ENODATA:
		case -EDESTADDRREQ:
			vllist->status = DNS_LOOKUP_GOT_NOT_FOUND;
			break;
		case -EAGAIN:
		case -ECONNREFUSED:
			vllist->status = DNS_LOOKUP_GOT_TEMP_FAILURE;
			break;
		default:
			vllist->status = DNS_LOOKUP_GOT_LOCAL_FAILURE;
			break;
		}
	}

	_debug("%s: got list %d %d", cell->name, vllist->source, vllist->status);
	cell->dns_status = vllist->status;

	now = ktime_get_real_seconds();
	if (min_ttl > max_ttl)
@@ -379,48 +418,47 @@ static void afs_update_cell(struct afs_cell *cell)
	else if (expiry > now + max_ttl)
		expiry = now + max_ttl;

	if (IS_ERR(vllist)) {
		switch (PTR_ERR(vllist)) {
		case -ENODATA:
		case -EDESTADDRREQ:
	_debug("%s: status %d", cell->name, vllist->status);
	if (vllist->source == DNS_RECORD_UNAVAILABLE) {
		switch (vllist->status) {
		case DNS_LOOKUP_GOT_NOT_FOUND:
			/* The DNS said that the cell does not exist or there
			 * weren't any addresses to be had.
			 */
			set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
			clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
			cell->dns_expiry = expiry;
			break;

		case -EAGAIN:
		case -ECONNREFUSED:
		case DNS_LOOKUP_BAD:
		case DNS_LOOKUP_GOT_LOCAL_FAILURE:
		case DNS_LOOKUP_GOT_TEMP_FAILURE:
		case DNS_LOOKUP_GOT_NS_FAILURE:
		default:
			set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
			cell->dns_expiry = now + 10;
			break;
		}

		cell->error = -EDESTADDRREQ;
	} else {
		clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
		clear_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
		cell->dns_expiry = expiry;
	}

		/* Exclusion on changing vl_addrs is achieved by a
		 * non-reentrant work item.
	/* Replace the VL server list if the new record has servers or the old
	 * record doesn't.
	 */
		old = rcu_dereference_protected(cell->vl_servers, true);
	write_lock(&cell->vl_servers_lock);
	p = rcu_dereference_protected(cell->vl_servers, true);
	if (vllist->nr_servers > 0 || p->nr_servers == 0) {
		rcu_assign_pointer(cell->vl_servers, vllist);
		cell->dns_expiry = expiry;

		if (old)
			afs_put_vlserverlist(cell->net, old);
		cell->dns_source = vllist->source;
		old = p;
	}
	write_unlock(&cell->vl_servers_lock);
	afs_put_vlserverlist(cell->net, old);

	if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
		wake_up_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET);

	now = ktime_get_real_seconds();
	afs_set_cell_timer(cell->net, cell->dns_expiry - now);
	_leave("");
out_wake:
	smp_store_release(&cell->dns_lookup_count,
			  cell->dns_lookup_count + 1); /* vs source/status */
	wake_up_var(&cell->dns_lookup_count);
	_leave(" = %d", ret);
	return ret;
}

/*
@@ -491,8 +529,7 @@ void afs_put_cell(struct afs_net *net, struct afs_cell *cell)
	now = ktime_get_real_seconds();
	cell->last_inactive = now;
	expire_delay = 0;
	if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
	    !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
	if (cell->vl_servers->nr_servers)
		expire_delay = afs_cell_gc_delay;

	if (atomic_dec_return(&cell->usage) > 1)
@@ -623,11 +660,13 @@ again:
			goto final_destruction;
		if (cell->state == AFS_CELL_FAILED)
			goto done;
		cell->state = AFS_CELL_UNSET;
		smp_store_release(&cell->state, AFS_CELL_UNSET);
		wake_up_var(&cell->state);
		goto again;

	case AFS_CELL_UNSET:
		cell->state = AFS_CELL_ACTIVATING;
		smp_store_release(&cell->state, AFS_CELL_ACTIVATING);
		wake_up_var(&cell->state);
		goto again;

	case AFS_CELL_ACTIVATING:
@@ -635,28 +674,29 @@ again:
		if (ret < 0)
			goto activation_failed;

		cell->state = AFS_CELL_ACTIVE;
		smp_wmb();
		clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
		wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
		smp_store_release(&cell->state, AFS_CELL_ACTIVE);
		wake_up_var(&cell->state);
		goto again;

	case AFS_CELL_ACTIVE:
		if (atomic_read(&cell->usage) > 1) {
			time64_t now = ktime_get_real_seconds();
			if (cell->dns_expiry <= now && net->live)
				afs_update_cell(cell);
			if (test_and_clear_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags)) {
				ret = afs_update_cell(cell);
				if (ret < 0)
					cell->error = ret;
			}
			goto done;
		}
		cell->state = AFS_CELL_DEACTIVATING;
		smp_store_release(&cell->state, AFS_CELL_DEACTIVATING);
		wake_up_var(&cell->state);
		goto again;

	case AFS_CELL_DEACTIVATING:
		set_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
		if (atomic_read(&cell->usage) > 1)
			goto reverse_deactivation;
		afs_deactivate_cell(net, cell);
		cell->state = AFS_CELL_INACTIVE;
		smp_store_release(&cell->state, AFS_CELL_INACTIVE);
		wake_up_var(&cell->state);
		goto again;

	default:
@@ -669,17 +709,13 @@ activation_failed:
	cell->error = ret;
	afs_deactivate_cell(net, cell);

	cell->state = AFS_CELL_FAILED;
	smp_wmb();
	if (test_and_clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags))
		wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
	smp_store_release(&cell->state, AFS_CELL_FAILED); /* vs error */
	wake_up_var(&cell->state);
	goto again;

reverse_deactivation:
	cell->state = AFS_CELL_ACTIVE;
	smp_wmb();
	clear_bit(AFS_CELL_FL_NOT_READY, &cell->flags);
	wake_up_bit(&cell->flags, AFS_CELL_FL_NOT_READY);
	smp_store_release(&cell->state, AFS_CELL_ACTIVE);
	wake_up_var(&cell->state);
	_leave(" [deact->act]");
	return;

@@ -739,11 +775,16 @@ void afs_manage_cells(struct work_struct *work)
		}

		if (usage == 1) {
			struct afs_vlserver_list *vllist;
			time64_t expire_at = cell->last_inactive;

			if (!test_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags) &&
			    !test_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags))
			read_lock(&cell->vl_servers_lock);
			vllist = rcu_dereference_protected(
				cell->vl_servers,
				lockdep_is_held(&cell->vl_servers_lock));
			if (vllist->nr_servers > 0)
				expire_at += afs_cell_gc_delay;
			read_unlock(&cell->vl_servers_lock);
			if (purging || expire_at <= now)
				sched_cell = true;
			else if (expire_at < next_manage)
@@ -751,10 +792,8 @@ void afs_manage_cells(struct work_struct *work)
		}

		if (!purging) {
			if (cell->dns_expiry <= now)
			if (test_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags))
				sched_cell = true;
			else if (cell->dns_expiry <= next_manage)
				next_manage = cell->dns_expiry;
		}

		if (sched_cell)
Loading