Commit bb737bbe authored by Vivek Goyal's avatar Vivek Goyal Committed by Miklos Szeredi
Browse files

virtiofs: schedule blocking async replies in separate worker



In virtiofs (unlike in regular fuse) processing of async replies is
serialized.  This can result in a deadlock in rare corner cases when
there's a circular dependency between the completion of two or more async
replies.

Such a deadlock can be reproduced with xfstests:generic/503 if TEST_DIR ==
SCRATCH_MNT (which is a misconfiguration):

 - Process A is waiting for page lock in worker thread context and blocked
   (virtio_fs_requests_done_work()).
 - Process B is holding page lock and waiting for pending writes to
   finish (fuse_wait_on_page_writeback()).
 - Write requests are waiting in virtqueue and can't complete because
   worker thread is blocked on page lock (process A).

Fix this by creating a unique work_struct for each async reply that can
block (O_DIRECT read).

Fixes: a62a8ef9 ("virtio-fs: add virtiofs filesystem")
Signed-off-by: default avatarVivek Goyal <vgoyal@redhat.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent ae83d0b4
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -712,6 +712,7 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc,
	spin_unlock(&io->lock);

	ia->ap.args.end = fuse_aio_complete_req;
	ia->ap.args.may_block = io->should_dirty;
	err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
	if (err)
		fuse_aio_complete_req(fc, &ia->ap.args, err);
+1 −0
Original line number Diff line number Diff line
@@ -249,6 +249,7 @@ struct fuse_args {
	bool out_argvar:1;
	bool page_zeroing:1;
	bool page_replace:1;
	bool may_block:1;
	struct fuse_in_arg in_args[3];
	struct fuse_arg out_args[2];
	void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
+71 −35
Original line number Diff line number Diff line
@@ -60,6 +60,12 @@ struct virtio_fs_forget {
	struct virtio_fs_forget_req req;
};

struct virtio_fs_req_work {
	struct fuse_req *req;
	struct virtio_fs_vq *fsvq;
	struct work_struct done_work;
};

static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
				 struct fuse_req *req, bool in_flight);

@@ -485,36 +491,16 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
}

/* Work function for request completion */
static void virtio_fs_requests_done_work(struct work_struct *work)
static void virtio_fs_request_complete(struct fuse_req *req,
				       struct virtio_fs_vq *fsvq)
{
	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
						 done_work);
	struct fuse_pqueue *fpq = &fsvq->fud->pq;
	struct fuse_conn *fc = fsvq->fud->fc;
	struct virtqueue *vq = fsvq->vq;
	struct fuse_req *req;
	struct fuse_args_pages *ap;
	struct fuse_req *next;
	struct fuse_args *args;
	struct fuse_args_pages *ap;
	unsigned int len, i, thislen;
	struct page *page;
	LIST_HEAD(reqs);

	/* Collect completed requests off the virtqueue */
	spin_lock(&fsvq->lock);
	do {
		virtqueue_disable_cb(vq);

		while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
			spin_lock(&fpq->lock);
			list_move_tail(&req->list, &reqs);
			spin_unlock(&fpq->lock);
		}
	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
	spin_unlock(&fsvq->lock);

	/* End requests */
	list_for_each_entry_safe(req, next, &reqs, list) {
	/*
	 * TODO verify that server properly follows FUSE protocol
	 * (oh.uniq, oh.len)
@@ -540,7 +526,6 @@ static void virtio_fs_requests_done_work(struct work_struct *work)

	spin_lock(&fpq->lock);
	clear_bit(FR_SENT, &req->flags);
		list_del_init(&req->list);
	spin_unlock(&fpq->lock);

	fuse_request_end(fc, req);
@@ -548,6 +533,57 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
	dec_in_flight_req(fsvq);
	spin_unlock(&fsvq->lock);
}

static void virtio_fs_complete_req_work(struct work_struct *work)
{
	struct virtio_fs_req_work *w =
		container_of(work, typeof(*w), done_work);

	virtio_fs_request_complete(w->req, w->fsvq);
	kfree(w);
}

static void virtio_fs_requests_done_work(struct work_struct *work)
{
	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
						 done_work);
	struct fuse_pqueue *fpq = &fsvq->fud->pq;
	struct virtqueue *vq = fsvq->vq;
	struct fuse_req *req;
	struct fuse_req *next;
	unsigned int len;
	LIST_HEAD(reqs);

	/* Collect completed requests off the virtqueue */
	spin_lock(&fsvq->lock);
	do {
		virtqueue_disable_cb(vq);

		while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
			spin_lock(&fpq->lock);
			list_move_tail(&req->list, &reqs);
			spin_unlock(&fpq->lock);
		}
	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
	spin_unlock(&fsvq->lock);

	/* End requests */
	list_for_each_entry_safe(req, next, &reqs, list) {
		list_del_init(&req->list);

		/* blocking async request completes in a worker context */
		if (req->args->may_block) {
			struct virtio_fs_req_work *w;

			w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
			INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
			w->fsvq = fsvq;
			w->req = req;
			schedule_work(&w->done_work);
		} else {
			virtio_fs_request_complete(req, fsvq);
		}
	}
}

/* Virtqueue interrupt handler */