Commit 37d4e84f authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-5.5-rc2' of git://github.com/ceph/ceph-client

Pull ceph fixes from Ilya Dryomov:
 "A fix to avoid a corner case when scheduling cap reclaim in batches
  from Xiubo, a patch to add some observability into cap waiters from
  Jeff and a couple of cleanups"

* tag 'ceph-for-5.5-rc2' of git://github.com/ceph/ceph-client:
  ceph: add more debug info when decoding mdsmap
  ceph: switch to global cap helper
  ceph: trigger the reclaim work once there has enough pending caps
  ceph: show tasks waiting on caps in debugfs caps file
  ceph: convert int fields in ceph_mount_options to unsigned int
parents ae4b064e da08e1e1
Loading
Loading
Loading
Loading
+27 −14
Original line number Diff line number Diff line
@@ -1011,18 +1011,13 @@ static int __ceph_is_single_caps(struct ceph_inode_info *ci)
	return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
}

static int __ceph_is_any_caps(struct ceph_inode_info *ci)
{
	return !RB_EMPTY_ROOT(&ci->i_caps);
}

int ceph_is_any_caps(struct inode *inode)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	int ret;

	spin_lock(&ci->i_ceph_lock);
	ret = __ceph_is_any_caps(ci);
	ret = __ceph_is_any_real_caps(ci);
	spin_unlock(&ci->i_ceph_lock);

	return ret;
@@ -1099,16 +1094,17 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
	if (removed)
		ceph_put_cap(mdsc, cap);

	if (!__ceph_is_any_real_caps(ci)) {
		/* when reconnect denied, we remove session caps forcibly,
		 * i_wr_ref can be non-zero. If there are ongoing write,
		 * keep i_snap_realm.
		 */
	if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
		if (ci->i_wr_ref == 0 && ci->i_snap_realm)
			drop_inode_snap_realm(ci);

	if (!__ceph_is_any_real_caps(ci))
		__cap_delay_cancel(mdsc, ci);
	}
}

struct cap_msg_args {
	struct ceph_mds_session	*session;
@@ -2764,7 +2760,19 @@ int ceph_get_caps(struct file *filp, int need, int want,
		if (ret == -EAGAIN)
			continue;
		if (!ret) {
			struct ceph_mds_client *mdsc = fsc->mdsc;
			struct cap_wait cw;
			DEFINE_WAIT_FUNC(wait, woken_wake_function);

			cw.ino = inode->i_ino;
			cw.tgid = current->tgid;
			cw.need = need;
			cw.want = want;

			spin_lock(&mdsc->caps_list_lock);
			list_add(&cw.list, &mdsc->cap_wait_list);
			spin_unlock(&mdsc->caps_list_lock);

			add_wait_queue(&ci->i_cap_wq, &wait);

			flags |= NON_BLOCKING;
@@ -2778,6 +2786,11 @@ int ceph_get_caps(struct file *filp, int need, int want,
			}

			remove_wait_queue(&ci->i_cap_wq, &wait);

			spin_lock(&mdsc->caps_list_lock);
			list_del(&cw.list);
			spin_unlock(&mdsc->caps_list_lock);

			if (ret == -EAGAIN)
				continue;
		}
@@ -2928,7 +2941,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
				ci->i_head_snapc = NULL;
			}
			/* see comment in __ceph_remove_cap() */
			if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
			if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
				drop_inode_snap_realm(ci);
		}
	spin_unlock(&ci->i_ceph_lock);
+13 −0
Original line number Diff line number Diff line
@@ -139,6 +139,7 @@ static int caps_show(struct seq_file *s, void *p)
	struct ceph_fs_client *fsc = s->private;
	struct ceph_mds_client *mdsc = fsc->mdsc;
	int total, avail, used, reserved, min, i;
	struct cap_wait	*cw;

	ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
	seq_printf(s, "total\t\t%d\n"
@@ -166,6 +167,18 @@ static int caps_show(struct seq_file *s, void *p)
	}
	mutex_unlock(&mdsc->mutex);

	seq_printf(s, "\n\nWaiters:\n--------\n");
	seq_printf(s, "tgid         ino                need             want\n");
	seq_printf(s, "-----------------------------------------------------\n");

	spin_lock(&mdsc->caps_list_lock);
	list_for_each_entry(cw, &mdsc->cap_wait_list, list) {
		seq_printf(s, "%-13d0x%-17lx%-17s%-17s\n", cw->tgid, cw->ino,
				ceph_cap_string(cw->need),
				ceph_cap_string(cw->want));
	}
	spin_unlock(&mdsc->caps_list_lock);

	return 0;
}

+5 −3
Original line number Diff line number Diff line
@@ -2015,7 +2015,7 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
	if (!nr)
		return;
	val = atomic_add_return(nr, &mdsc->cap_reclaim_pending);
	if (!(val % CEPH_CAPS_PER_RELEASE)) {
	if ((val % CEPH_CAPS_PER_RELEASE) < nr) {
		atomic_set(&mdsc->cap_reclaim_pending, 0);
		ceph_queue_cap_reclaim_work(mdsc);
	}
@@ -2032,12 +2032,13 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
	struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
	size_t size = sizeof(struct ceph_mds_reply_dir_entry);
	int order, num_entries;
	unsigned int num_entries;
	int order;

	spin_lock(&ci->i_ceph_lock);
	num_entries = ci->i_files + ci->i_subdirs;
	spin_unlock(&ci->i_ceph_lock);
	num_entries = max(num_entries, 1);
	num_entries = max(num_entries, 1U);
	num_entries = min(num_entries, opt->max_readdir);

	order = get_order(size * num_entries);
@@ -4168,6 +4169,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
	INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
	mdsc->last_renew_caps = jiffies;
	INIT_LIST_HEAD(&mdsc->cap_delay_list);
	INIT_LIST_HEAD(&mdsc->cap_wait_list);
	spin_lock_init(&mdsc->cap_delay_lock);
	INIT_LIST_HEAD(&mdsc->snap_flush_list);
	spin_lock_init(&mdsc->snap_flush_lock);
+9 −0
Original line number Diff line number Diff line
@@ -340,6 +340,14 @@ struct ceph_quotarealm_inode {
	struct inode *inode;
};

struct cap_wait {
	struct list_head	list;
	unsigned long		ino;
	pid_t			tgid;
	int			need;
	int			want;
};

/*
 * mds client state
 */
@@ -416,6 +424,7 @@ struct ceph_mds_client {
	spinlock_t	caps_list_lock;
	struct		list_head caps_list; /* unused (reserved or
						unreserved) */
	struct		list_head cap_wait_list;
	int		caps_total_count;    /* total caps allocated */
	int		caps_use_count;      /* in use */
	int		caps_use_max;	     /* max used caps */
+8 −4
Original line number Diff line number Diff line
@@ -158,6 +158,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
		void *pexport_targets = NULL;
		struct ceph_timespec laggy_since;
		struct ceph_mds_info *info;
		bool laggy;

		ceph_decode_need(p, end, sizeof(u64) + 1, bad);
		global_id = ceph_decode_64(p);
@@ -190,6 +191,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
		if (err)
			goto corrupt;
		ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
		laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
		*p += sizeof(u32);
		ceph_decode_32_safe(p, end, namelen, bad);
		*p += namelen;
@@ -207,10 +209,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
			*p = info_end;
		}

		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
		dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s%s\n",
		     i+1, n, global_id, mds, inc,
		     ceph_pr_addr(&addr),
		     ceph_mds_state_name(state));
		     ceph_mds_state_name(state),
		     laggy ? "(laggy)" : "");

		if (mds < 0 || state <= 0)
			continue;
@@ -230,8 +233,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
		info->global_id = global_id;
		info->state = state;
		info->addr = addr;
		info->laggy = (laggy_since.tv_sec != 0 ||
			       laggy_since.tv_nsec != 0);
		info->laggy = laggy;
		info->num_export_targets = num_export_targets;
		if (num_export_targets) {
			info->export_targets = kcalloc(num_export_targets,
@@ -355,6 +357,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
		m->m_damaged = false;
	}
bad_ext:
	dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
	     !!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
	*p = end;
	dout("mdsmap_decode success epoch %u\n", m->m_epoch);
	return m;
Loading