aio: convert the ioctx list to table lookup v3 (db446a08) · Commits · 戴 / test

fs/aio.c

+114 −22

Original line number	Diff line number	Diff line
		@@ -66,6 +66,12 @@ struct aio_ring {

		#define AIO_RING_PAGES 8

		struct kioctx_table {
		struct rcu_head rcu;
		unsigned nr;
		struct kioctx *table[];
		};

		struct kioctx_cpu {
		unsigned reqs_available;
		};
		@@ -74,9 +80,7 @@ struct kioctx {
		struct percpu_ref users;
		atomic_t dead;

		/* This needs improving */
		unsigned long user_id;
		struct hlist_node list;

		struct __percpu kioctx_cpu *cpu;

		@@ -135,6 +139,8 @@ struct kioctx {

		struct page *internal_pages[AIO_RING_PAGES];
		struct file *aio_ring_file;

		unsigned id;
		};

		/------ sysctl variables----/
		@@ -326,7 +332,7 @@ static int aio_setup_ring(struct kioctx *ctx)

		ring = kmap_atomic(ctx->ring_pages[0]);
		ring->nr = nr_events; /* user copy */
		ring->id = ctx->user_id;
		ring->id = ~0U;
		ring->head = ring->tail = 0;
		ring->magic = AIO_RING_MAGIC;
		ring->compat_features = AIO_RING_COMPAT_FEATURES;
		@@ -462,6 +468,58 @@ static void free_ioctx_ref(struct percpu_ref *ref)
		schedule_work(&ctx->free_work);
		}

		static int ioctx_add_table(struct kioctx ctx, struct mm_struct mm)
		{
		unsigned i, new_nr;
		struct kioctx_table table, old;
		struct aio_ring *ring;

		spin_lock(&mm->ioctx_lock);
		table = rcu_dereference(mm->ioctx_table);

		while (1) {
		if (table)
		for (i = 0; i < table->nr; i++)
		if (!table->table[i]) {
		ctx->id = i;
		table->table[i] = ctx;
		spin_unlock(&mm->ioctx_lock);

		ring = kmap_atomic(ctx->ring_pages[0]);
		ring->id = ctx->id;
		kunmap_atomic(ring);
		return 0;
		}

		new_nr = (table ? table->nr : 1) * 4;

		spin_unlock(&mm->ioctx_lock);

		table = kzalloc(sizeof(table) + sizeof(struct kioctx ) *
		new_nr, GFP_KERNEL);
		if (!table)
		return -ENOMEM;

		table->nr = new_nr;

		spin_lock(&mm->ioctx_lock);
		old = rcu_dereference(mm->ioctx_table);

		if (!old) {
		rcu_assign_pointer(mm->ioctx_table, table);
		} else if (table->nr > old->nr) {
		memcpy(table->table, old->table,
		old->nr * sizeof(struct kioctx *));

		rcu_assign_pointer(mm->ioctx_table, table);
		kfree_rcu(old, rcu);
		} else {
		kfree(table);
		table = old;
		}
		}
		}

		/* ioctx_alloc
		* Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
		*/
		@@ -520,6 +578,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
		ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
		BUG_ON(!ctx->req_batch);

		err = ioctx_add_table(ctx, mm);
		if (err)
		goto out_cleanup_noerr;

		/* limit the number of system wide aios */
		spin_lock(&aio_nr_lock);
		if (aio_nr + nr_events > (aio_max_nr * 2UL) \|\|
		@@ -532,17 +594,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)

		percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */

		/* now link into global list. */
		spin_lock(&mm->ioctx_lock);
		hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
		spin_unlock(&mm->ioctx_lock);

		pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
		ctx, ctx->user_id, mm, ctx->nr_events);
		return ctx;

		out_cleanup:
		err = -EAGAIN;
		out_cleanup_noerr:
		aio_free_ring(ctx);
		out_freepcpu:
		free_percpu(ctx->cpu);
		@@ -561,10 +619,18 @@ out_freectx:
		* when the processes owning a context have all exited to encourage
		* the rapid destruction of the kioctx.
		*/
		static void kill_ioctx(struct kioctx *ctx)
		static void kill_ioctx(struct mm_struct mm, struct kioctx ctx)
		{
		if (!atomic_xchg(&ctx->dead, 1)) {
		hlist_del_rcu(&ctx->list);
		struct kioctx_table *table;

		spin_lock(&mm->ioctx_lock);
		table = rcu_dereference(mm->ioctx_table);

		WARN_ON(ctx != table->table[ctx->id]);
		table->table[ctx->id] = NULL;
		spin_unlock(&mm->ioctx_lock);

		/* percpu_ref_kill() will do the necessary call_rcu() */
		wake_up_all(&ctx->wait);

		@@ -613,10 +679,28 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
		*/
		void exit_aio(struct mm_struct *mm)
		{
		struct kioctx_table *table;
		struct kioctx *ctx;
		struct hlist_node *n;
		unsigned i = 0;

		while (1) {
		rcu_read_lock();
		table = rcu_dereference(mm->ioctx_table);

		do {
		if (!table \|\| i >= table->nr) {
		rcu_read_unlock();
		rcu_assign_pointer(mm->ioctx_table, NULL);
		if (table)
		kfree(table);
		return;
		}

		ctx = table->table[i++];
		} while (!ctx);

		rcu_read_unlock();

		hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) {
		/*
		* We don't need to bother with munmap() here -
		* exit_mmap(mm) is coming and it'll unmap everything.
		@@ -627,7 +711,7 @@ void exit_aio(struct mm_struct *mm)
		*/
		ctx->mmap_size = 0;

		kill_ioctx(ctx);
		kill_ioctx(mm, ctx);
		}
		}

		@@ -710,19 +794,27 @@ static void kiocb_free(struct kiocb *req)

		static struct kioctx *lookup_ioctx(unsigned long ctx_id)
		{
		struct aio_ring __user ring = (void __user )ctx_id;
		struct mm_struct *mm = current->mm;
		struct kioctx ctx, ret = NULL;
		struct kioctx_table *table;
		unsigned id;

		if (get_user(id, &ring->id))
		return NULL;

		rcu_read_lock();
		table = rcu_dereference(mm->ioctx_table);

		if (!table \|\| id >= table->nr)
		goto out;

		hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) {
		ctx = table->table[id];
		if (ctx->user_id == ctx_id) {
		percpu_ref_get(&ctx->users);
		ret = ctx;
		break;
		}
		}

		out:
		rcu_read_unlock();
		return ret;
		}
		@@ -998,7 +1090,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
		if (!IS_ERR(ioctx)) {
		ret = put_user(ioctx->user_id, ctxp);
		if (ret)
		kill_ioctx(ioctx);
		kill_ioctx(current->mm, ioctx);
		percpu_ref_put(&ioctx->users);
		}

		@@ -1016,7 +1108,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
		{
		struct kioctx *ioctx = lookup_ioctx(ctx);
		if (likely(NULL != ioctx)) {
		kill_ioctx(ioctx);
		kill_ioctx(current->mm, ioctx);
		percpu_ref_put(&ioctx->users);
		return 0;
		}

include/linux/mm_types.h

+3 −2

Original line number	Diff line number	Diff line
		@@ -322,6 +322,7 @@ struct mm_rss_stat {
		atomic_long_t count[NR_MM_COUNTERS];
		};

		struct kioctx_table;
		struct mm_struct {
		struct vm_area_struct * mmap; /* list of VMAs */
		struct rb_root mm_rb;
		@@ -383,7 +384,7 @@ struct mm_struct {
		struct core_state core_state; / coredumping support */
		#ifdef CONFIG_AIO
		spinlock_t ioctx_lock;
		struct hlist_head ioctx_list;
		struct kioctx_table __rcu *ioctx_table;
		#endif
		#ifdef CONFIG_MM_OWNER
		/*

kernel/fork.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -522,7 +522,7 @@ static void mm_init_aio(struct mm_struct *mm)
		{
		#ifdef CONFIG_AIO
		spin_lock_init(&mm->ioctx_lock);
		INIT_HLIST_HEAD(&mm->ioctx_list);
		mm->ioctx_table = NULL;
		#endif
		}

Admin message