cpuset: rewrite update_tasks_nodemask() (3b6766fe) · Commits · 戴 / test

kernel/cpuset.c

+39 −70

Original line number	Diff line number	Diff line
		@@ -1026,6 +1026,31 @@ static void cpuset_migrate_mm(struct mm_struct mm, const nodemask_t from,
		mutex_unlock(&callback_mutex);
		}

		/*
		* Rebind task's vmas to cpuset's new mems_allowed, and migrate pages to new
		* nodes if memory_migrate flag is set. Called with cgroup_mutex held.
		*/
		static void cpuset_change_nodemask(struct task_struct *p,
		struct cgroup_scanner *scan)
		{
		struct mm_struct *mm;
		struct cpuset *cs;
		int migrate;
		const nodemask_t *oldmem = scan->data;

		mm = get_task_mm(p);
		if (!mm)
		return;

		cs = cgroup_cs(scan->cg);
		migrate = is_memory_migrate(cs);

		mpol_rebind_mm(mm, &cs->mems_allowed);
		if (migrate)
		cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
		mmput(mm);
		}

		static void *cpuset_being_rebound;

		/**
		@@ -1038,88 +1063,32 @@ static void *cpuset_being_rebound;
		*/
		static int update_tasks_nodemask(struct cpuset cs, const nodemask_t oldmem)
		{
		struct task_struct *p;
		struct mm_struct **mmarray;
		int i, n, ntasks;
		int migrate;
		int fudge;
		struct cgroup_iter it;
		int retval;
		struct cgroup_scanner scan;

		cpuset_being_rebound = cs; /* causes mpol_dup() rebind */

		fudge = 10; /* spare mmarray[] slots */
		fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
		retval = -ENOMEM;

		/*
		* Allocate mmarray[] to hold mm reference for each task
		* in cpuset cs. Can't kmalloc GFP_KERNEL while holding
		* tasklist_lock. We could use GFP_ATOMIC, but with a
		* few more lines of code, we can retry until we get a big
		* enough mmarray[] w/o using GFP_ATOMIC.
		*/
		while (1) {
		ntasks = cgroup_task_count(cs->css.cgroup); /* guess */
		ntasks += fudge;
		mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
		if (!mmarray)
		goto done;
		read_lock(&tasklist_lock); /* block fork */
		if (cgroup_task_count(cs->css.cgroup) <= ntasks)
		break; /* got enough */
		read_unlock(&tasklist_lock); /* try again */
		kfree(mmarray);
		}

		n = 0;

		/* Load up mmarray[] with mm reference for each task in cpuset. */
		cgroup_iter_start(cs->css.cgroup, &it);
		while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
		struct mm_struct *mm;

		if (n >= ntasks) {
		printk(KERN_WARNING
		"Cpuset mempolicy rebind incomplete.\n");
		break;
		}
		mm = get_task_mm(p);
		if (!mm)
		continue;
		mmarray[n++] = mm;
		}
		cgroup_iter_end(cs->css.cgroup, &it);
		read_unlock(&tasklist_lock);
		scan.cg = cs->css.cgroup;
		scan.test_task = NULL;
		scan.process_task = cpuset_change_nodemask;
		scan.heap = NULL;
		scan.data = (nodemask_t *)oldmem;

		/*
		* Now that we've dropped the tasklist spinlock, we can
		* rebind the vma mempolicies of each mm in mmarray[] to their
		* new cpuset, and release that mm. The mpol_rebind_mm()
		* call takes mmap_sem, which we couldn't take while holding
		* tasklist_lock. Forks can happen again now - the mpol_dup()
		* cpuset_being_rebound check will catch such forks, and rebind
		* their vma mempolicies too. Because we still hold the global
		* cgroup_mutex, we know that no other rebind effort will
		* be contending for the global variable cpuset_being_rebound.
		* The mpol_rebind_mm() call takes mmap_sem, which we couldn't
		* take while holding tasklist_lock. Forks can happen - the
		* mpol_dup() cpuset_being_rebound check will catch such forks,
		* and rebind their vma mempolicies too. Because we still hold
		* the global cgroup_mutex, we know that no other rebind effort
		* will be contending for the global variable cpuset_being_rebound.
		* It's ok if we rebind the same mm twice; mpol_rebind_mm()
		* is idempotent. Also migrate pages in each mm to new nodes.
		*/
		migrate = is_memory_migrate(cs);
		for (i = 0; i < n; i++) {
		struct mm_struct *mm = mmarray[i];

		mpol_rebind_mm(mm, &cs->mems_allowed);
		if (migrate)
		cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
		mmput(mm);
		}
		retval = cgroup_scan_tasks(&scan);

		/* We're done rebinding vmas to this cpuset's new mems_allowed. */
		kfree(mmarray);
		cpuset_being_rebound = NULL;
		retval = 0;
		done:

		return retval;
		}

Admin message