Commit e97b71de authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
  ceph: fix ioctl magic
  ceph: Behave better when handling file lock replies.
  ceph: pass lock information by struct file_lock instead of as individual params.
  ceph: Handle file locks in replies from the MDS.
  ceph: avoid possible null deref in readdir after dir llseek
parents 38971ce2 1cd275f6
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -114,8 +114,8 @@ static int __dcache_readdir(struct file *filp,
	spin_lock(&dcache_lock);

	/* start at beginning? */
	if (filp->f_pos == 2 || (last &&
				 filp->f_pos < ceph_dentry(last)->offset)) {
	if (filp->f_pos == 2 || last == NULL ||
	    filp->f_pos < ceph_dentry(last)->offset) {
		if (list_empty(&parent->d_subdirs))
			goto out_unlock;
		p = parent->d_subdirs.prev;
+1 −1
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@
#include <linux/ioctl.h>
#include <linux/types.h>

#define CEPH_IOCTL_MAGIC 0x98
#define CEPH_IOCTL_MAGIC 0x97

/* just use u64 to align sanely on all archs */
struct ceph_ioctl_layout {
+50 −44
Original line number Diff line number Diff line
@@ -11,40 +11,68 @@
 * Implement fcntl and flock locking functions.
 */
static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
			     u64 pid, u64 pid_ns,
			     int cmd, u64 start, u64 length, u8 wait)
			     int cmd, u8 wait, struct file_lock *fl)
{
	struct inode *inode = file->f_dentry->d_inode;
	struct ceph_mds_client *mdsc =
		ceph_sb_to_client(inode->i_sb)->mdsc;
	struct ceph_mds_request *req;
	int err;
	u64 length = 0;

	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
	if (IS_ERR(req))
		return PTR_ERR(req);
	req->r_inode = igrab(inode);

	/* mds requires start and length rather than start and end */
	if (LLONG_MAX == fl->fl_end)
		length = 0;
	else
		length = fl->fl_end - fl->fl_start + 1;

	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
	     "length: %llu, wait: %d, type`: %d", (int)lock_type,
	     (int)operation, pid, start, length, wait, cmd);
	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
	     length, wait, fl->fl_type);


	req->r_args.filelock_change.rule = lock_type;
	req->r_args.filelock_change.type = cmd;
	req->r_args.filelock_change.pid = cpu_to_le64(pid);
	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
	/* This should be adjusted, but I'm not sure if
	   namespaces actually get id numbers*/
	req->r_args.filelock_change.pid_namespace =
		cpu_to_le64((u64)pid_ns);
	req->r_args.filelock_change.start = cpu_to_le64(start);
		cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
	req->r_args.filelock_change.length = cpu_to_le64(length);
	req->r_args.filelock_change.wait = wait;

	err = ceph_mdsc_do_request(mdsc, inode, req);

	if ( operation == CEPH_MDS_OP_GETFILELOCK){
		fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
			fl->fl_type = F_RDLCK;
		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
			fl->fl_type = F_WRLCK;
		else
			fl->fl_type = F_UNLCK;

		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
		if (length >= 1)
			fl->fl_end = length -1;
		else
			fl->fl_end = 0;

	}
	ceph_mdsc_put_request(req);
	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
	     "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type,
	     (int)operation, pid, start, length, wait, cmd, err);
	     "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
	     length, wait, fl->fl_type, err);
	return err;
}

@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 */
int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
{
	u64 length;
	u8 lock_cmd;
	int err;
	u8 wait = 0;
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
	else
		lock_cmd = CEPH_LOCK_UNLOCK;

	if (LLONG_MAX == fl->fl_end)
		length = 0;
	else
		length = fl->fl_end - fl->fl_start + 1;

	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
				(u64)fl->fl_pid,
				(u64)(unsigned long)fl->fl_nspid,
				lock_cmd, fl->fl_start,
				length, wait);
	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
	if (!err) {
		if ( op != CEPH_MDS_OP_GETFILELOCK ){
			dout("mds locked, locking locally");
			err = posix_lock_file(file, fl, NULL);
			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
				/* undo! This should only happen if the kernel detects
				 * local deadlock. */
				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
					  (u64)fl->fl_pid,
					  (u64)(unsigned long)fl->fl_nspid,
					  CEPH_LOCK_UNLOCK, fl->fl_start,
					  length, 0);
						  CEPH_LOCK_UNLOCK, 0, fl);
				dout("got %d on posix_lock_file, undid lock", err);
			}
		}

	} else {
		dout("mds returned error code %d", err);
	}
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)

int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
{
	u64 length;
	u8 lock_cmd;
	int err;
	u8 wait = 1;
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
		lock_cmd = CEPH_LOCK_EXCL;
	else
		lock_cmd = CEPH_LOCK_UNLOCK;
	/* mds requires start and length rather than start and end */
	if (LLONG_MAX == fl->fl_end)
		length = 0;
	else
		length = fl->fl_end - fl->fl_start + 1;

	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
				file, (u64)fl->fl_pid,
				(u64)(unsigned long)fl->fl_nspid,
				lock_cmd, fl->fl_start,
				length, wait);
				file, lock_cmd, wait, fl);
	if (!err) {
		err = flock_lock_file_wait(file, fl);
		if (err) {
			ceph_lock_message(CEPH_LOCK_FLOCK,
					  CEPH_MDS_OP_SETFILELOCK,
					  file, (u64)fl->fl_pid,
					  (u64)(unsigned long)fl->fl_nspid,
					  CEPH_LOCK_UNLOCK, fl->fl_start,
					  length, 0);
					  file, CEPH_LOCK_UNLOCK, 0, fl);
			dout("got %d on flock_lock_file_wait, undid lock", err);
		}
	} else {
+37 −4
Original line number Diff line number Diff line
@@ -201,6 +201,38 @@ out_bad:
	return err;
}

/*
 * parse fcntl F_GETLK results
 */
static int parse_reply_info_filelock(void **p, void *end,
                struct ceph_mds_reply_info_parsed *info)
{
	if (*p + sizeof(*info->filelock_reply) > end)
		goto bad;

	info->filelock_reply = *p;
	*p += sizeof(*info->filelock_reply);

	if (unlikely(*p != end))
		goto bad;
	return 0;

bad:
	return -EIO;
}

/*
 * parse extra results
 */
static int parse_reply_info_extra(void **p, void *end,
                struct ceph_mds_reply_info_parsed *info)
{
	if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
		return parse_reply_info_filelock(p, end, info);
	else
		return parse_reply_info_dir(p, end, info);
}

/*
 * parse entire mds reply
 */
@@ -223,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg,
			goto out_bad;
	}

	/* dir content */
	/* extra */
	ceph_decode_32_safe(&p, end, len, bad);
	if (len > 0) {
		err = parse_reply_info_dir(&p, p+len, info);
		err = parse_reply_info_extra(&p, p+len, info);
		if (err < 0)
			goto out_bad;
	}
@@ -2074,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)

	mutex_lock(&session->s_mutex);
	if (err < 0) {
		pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds);
		pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
		ceph_msg_dump(msg);
		goto out_err;
	}
@@ -2094,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
	mutex_lock(&req->r_fill_mutex);
	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
	if (err == 0) {
		if (result == 0 && rinfo->dir_nr)
		if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
		    rinfo->dir_nr)
			ceph_readdir_prepopulate(req, req->r_session);
		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
	}
+21 −10
Original line number Diff line number Diff line
@@ -42,19 +42,28 @@ struct ceph_mds_reply_info_in {
};

/*
 * parsed info about an mds reply, including information about the
 * target inode and/or its parent directory and dentry, and directory
 * contents (for readdir results).
 * parsed info about an mds reply, including information about
 * either: 1) the target inode and/or its parent directory and dentry,
 * and directory contents (for readdir results), or
 * 2) the file range lock info (for fcntl F_GETLK results).
 */
struct ceph_mds_reply_info_parsed {
	struct ceph_mds_reply_head    *head;

	/* trace */
	struct ceph_mds_reply_info_in diri, targeti;
	struct ceph_mds_reply_dirfrag *dirfrag;
	char                          *dname;
	u32                           dname_len;
	struct ceph_mds_reply_lease   *dlease;

	/* extra */
	union {
		/* for fcntl F_GETLK results */
		struct ceph_filelock *filelock_reply;

		/* for readdir results */
		struct {
			struct ceph_mds_reply_dirfrag *dir_dir;
			int                           dir_nr;
			char                          **dir_dname;
@@ -62,6 +71,8 @@ struct ceph_mds_reply_info_parsed {
			struct ceph_mds_reply_lease   **dir_dlease;
			struct ceph_mds_reply_info_in *dir_in;
			u8                            dir_complete, dir_end;
		};
	};

	/* encoded blob describing snapshot contexts for certain
	   operations (e.g., open) */