Merge branch 'for-5.2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup (0011572c) · Commits · 戴 / test

Documentation/cgroup-v1/hugetlb.txt

+13 −9

Original line number	Diff line number	Diff line
		@@ -32,14 +32,18 @@ Brief summary of control files
		hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
		hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB limit

		For a system supporting two hugepage size (16M and 16G) the control
		For a system supporting three hugepage sizes (64k, 32M and 1G), the control
		files include:

		hugetlb.16GB.limit_in_bytes
		hugetlb.16GB.max_usage_in_bytes
		hugetlb.16GB.usage_in_bytes
		hugetlb.16GB.failcnt
		hugetlb.16MB.limit_in_bytes
		hugetlb.16MB.max_usage_in_bytes
		hugetlb.16MB.usage_in_bytes
		hugetlb.16MB.failcnt
		hugetlb.1GB.limit_in_bytes
		hugetlb.1GB.max_usage_in_bytes
		hugetlb.1GB.usage_in_bytes
		hugetlb.1GB.failcnt
		hugetlb.64KB.limit_in_bytes
		hugetlb.64KB.max_usage_in_bytes
		hugetlb.64KB.usage_in_bytes
		hugetlb.64KB.failcnt
		hugetlb.32MB.limit_in_bytes
		hugetlb.32MB.max_usage_in_bytes
		hugetlb.32MB.usage_in_bytes
		hugetlb.32MB.failcnt

include/linux/cgroup-defs.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -221,6 +221,7 @@ struct css_set {
		*/
		struct list_head tasks;
		struct list_head mg_tasks;
		struct list_head dying_tasks;

		/* all css_task_iters currently walking this cset */
		struct list_head task_iters;

include/linux/cgroup.h

+12 −2

Original line number	Diff line number	Diff line
		@@ -43,6 +43,9 @@
		/* walk all threaded css_sets in the domain */
		#define CSS_TASK_ITER_THREADED (1U << 1)

		/* internal flags */
		#define CSS_TASK_ITER_SKIPPED (1U << 16)

		/* a css_task_iter should be treated as an opaque object */
		struct css_task_iter {
		struct cgroup_subsys *ss;
		@@ -57,6 +60,7 @@ struct css_task_iter {
		struct list_head *task_pos;
		struct list_head *tasks_head;
		struct list_head *mg_tasks_head;
		struct list_head *dying_tasks_head;

		struct css_set *cur_cset;
		struct css_set *cur_dcset;
		@@ -487,7 +491,7 @@ static inline struct cgroup_subsys_state task_css(struct task_struct task,
		*
		* Find the css for the (@task, @subsys_id) combination, increment a
		* reference on and return it. This function is guaranteed to return a
		* valid css.
		* valid css. The returned css may already have been offlined.
		*/
		static inline struct cgroup_subsys_state *
		task_get_css(struct task_struct *task, int subsys_id)
		@@ -497,7 +501,13 @@ task_get_css(struct task_struct *task, int subsys_id)
		rcu_read_lock();
		while (true) {
		css = task_css(task, subsys_id);
		if (likely(css_tryget_online(css)))
		/*
		* Can't use css_tryget_online() here. A task which has
		* PF_EXITING set may stay associated with an offline css.
		* If such task calls this function, css_tryget_online()
		* will keep failing.
		*/
		if (likely(css_tryget(css)))
		break;
		cpu_relax();
		}

kernel/cgroup/cgroup.c

+76 −30

Original line number	Diff line number	Diff line
		@@ -215,7 +215,8 @@ static struct cftype cgroup_base_files[];

		static int cgroup_apply_control(struct cgroup *cgrp);
		static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
		static void css_task_iter_advance(struct css_task_iter *it);
		static void css_task_iter_skip(struct css_task_iter *it,
		struct task_struct *task);
		static int cgroup_destroy_locked(struct cgroup *cgrp);
		static struct cgroup_subsys_state css_create(struct cgroup cgrp,
		struct cgroup_subsys *ss);
		@@ -738,6 +739,7 @@ struct css_set init_css_set = {
		.dom_cset = &init_css_set,
		.tasks = LIST_HEAD_INIT(init_css_set.tasks),
		.mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
		.dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
		.task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
		.threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
		.cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
		@@ -843,6 +845,21 @@ static void css_set_update_populated(struct css_set *cset, bool populated)
		cgroup_update_populated(link->cgrp, populated);
		}

		/*
		* @task is leaving, advance task iterators which are pointing to it so
		* that they can resume at the next position. Advancing an iterator might
		* remove it from the list, use safe walk. See css_task_iter_skip() for
		* details.
		*/
		static void css_set_skip_task_iters(struct css_set *cset,
		struct task_struct *task)
		{
		struct css_task_iter it, pos;

		list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
		css_task_iter_skip(it, task);
		}

		/**
		* css_set_move_task - move a task from one css_set to another
		* @task: task being moved
		@@ -868,22 +885,9 @@ static void css_set_move_task(struct task_struct *task,
		css_set_update_populated(to_cset, true);

		if (from_cset) {
		struct css_task_iter it, pos;

		WARN_ON_ONCE(list_empty(&task->cg_list));

		/*
		* @task is leaving, advance task iterators which are
		* pointing to it so that they can resume at the next
		* position. Advancing an iterator might remove it from
		* the list, use safe walk. See css_task_iter_advance*()
		* for details.
		*/
		list_for_each_entry_safe(it, pos, &from_cset->task_iters,
		iters_node)
		if (it->task_pos == &task->cg_list)
		css_task_iter_advance(it);

		css_set_skip_task_iters(from_cset, task);
		list_del_init(&task->cg_list);
		if (!css_set_populated(from_cset))
		css_set_update_populated(from_cset, false);
		@@ -1210,6 +1214,7 @@ static struct css_set find_css_set(struct css_set old_cset,
		cset->dom_cset = cset;
		INIT_LIST_HEAD(&cset->tasks);
		INIT_LIST_HEAD(&cset->mg_tasks);
		INIT_LIST_HEAD(&cset->dying_tasks);
		INIT_LIST_HEAD(&cset->task_iters);
		INIT_LIST_HEAD(&cset->threaded_csets);
		INIT_HLIST_NODE(&cset->hlist);
		@@ -4408,15 +4413,18 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
		it->task_pos = NULL;
		return;
		}
		} while (!css_set_populated(cset));
		} while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));

		if (!list_empty(&cset->tasks))
		it->task_pos = cset->tasks.next;
		else
		else if (!list_empty(&cset->mg_tasks))
		it->task_pos = cset->mg_tasks.next;
		else
		it->task_pos = cset->dying_tasks.next;

		it->tasks_head = &cset->tasks;
		it->mg_tasks_head = &cset->mg_tasks;
		it->dying_tasks_head = &cset->dying_tasks;

		/*
		* We don't keep css_sets locked across iteration steps and thus
		@@ -4442,9 +4450,20 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
		list_add(&it->iters_node, &cset->task_iters);
		}

		static void css_task_iter_skip(struct css_task_iter *it,
		struct task_struct *task)
		{
		lockdep_assert_held(&css_set_lock);

		if (it->task_pos == &task->cg_list) {
		it->task_pos = it->task_pos->next;
		it->flags \|= CSS_TASK_ITER_SKIPPED;
		}
		}

		static void css_task_iter_advance(struct css_task_iter *it)
		{
		struct list_head *next;
		struct task_struct *task;

		lockdep_assert_held(&css_set_lock);
		repeat:
		@@ -4454,26 +4473,41 @@ repeat:
		* consumed first and then ->mg_tasks. After ->mg_tasks,
		* we move onto the next cset.
		*/
		next = it->task_pos->next;

		if (next == it->tasks_head)
		next = it->mg_tasks_head->next;
		if (it->flags & CSS_TASK_ITER_SKIPPED)
		it->flags &= ~CSS_TASK_ITER_SKIPPED;
		else
		it->task_pos = it->task_pos->next;

		if (next == it->mg_tasks_head)
		if (it->task_pos == it->tasks_head)
		it->task_pos = it->mg_tasks_head->next;
		if (it->task_pos == it->mg_tasks_head)
		it->task_pos = it->dying_tasks_head->next;
		if (it->task_pos == it->dying_tasks_head)
		css_task_iter_advance_css_set(it);
		else
		it->task_pos = next;
		} else {
		/* called from start, proceed to the first cset */
		css_task_iter_advance_css_set(it);
		}

		if (!it->task_pos)
		return;

		task = list_entry(it->task_pos, struct task_struct, cg_list);

		if (it->flags & CSS_TASK_ITER_PROCS) {
		/* if PROCS, skip over tasks which aren't group leaders */
		if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
		!thread_group_leader(list_entry(it->task_pos, struct task_struct,
		cg_list)))
		if (!thread_group_leader(task))
		goto repeat;

		/* and dying leaders w/o live member threads */
		if (!atomic_read(&task->signal->live))
		goto repeat;
		} else {
		/* skip all dying ones */
		if (task->flags & PF_EXITING)
		goto repeat;
		}
		}

		/**
		* css_task_iter_start - initiate task iteration
		@@ -4528,6 +4562,10 @@ struct task_struct css_task_iter_next(struct css_task_iter it)

		spin_lock_irq(&css_set_lock);

		/* @it may be half-advanced by skips, finish advancing */
		if (it->flags & CSS_TASK_ITER_SKIPPED)
		css_task_iter_advance(it);

		if (it->task_pos) {
		it->cur_task = list_entry(it->task_pos, struct task_struct,
		cg_list);
		@@ -6009,6 +6047,7 @@ void cgroup_exit(struct task_struct *tsk)
		if (!list_empty(&tsk->cg_list)) {
		spin_lock_irq(&css_set_lock);
		css_set_move_task(tsk, cset, NULL, false);
		list_add_tail(&tsk->cg_list, &cset->dying_tasks);
		cset->nr_tasks--;

		WARN_ON_ONCE(cgroup_task_frozen(tsk));
		@@ -6034,6 +6073,13 @@ void cgroup_release(struct task_struct *task)
		do_each_subsys_mask(ss, ssid, have_release_callback) {
		ss->release(task);
		} while_each_subsys_mask();

		if (use_task_css_set_links) {
		spin_lock_irq(&css_set_lock);
		css_set_skip_task_iters(task_css_set(task), task);
		list_del_init(&task->cg_list);
		spin_unlock_irq(&css_set_lock);
		}
		}

		void cgroup_free(struct task_struct *task)

kernel/cgroup/cpuset.c

+14 −1

Original line number	Diff line number	Diff line
		@@ -3254,10 +3254,23 @@ void cpuset_cpus_allowed(struct task_struct tsk, struct cpumask pmask)
		spin_unlock_irqrestore(&callback_lock, flags);
		}

		/**
		* cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
		* @tsk: pointer to task_struct with which the scheduler is struggling
		*
		* Description: In the case that the scheduler cannot find an allowed cpu in
		* tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
		* mode however, this value is the same as task_cs(tsk)->effective_cpus,
		* which will not contain a sane cpumask during cases such as cpu hotplugging.
		* This is the absolute last resort for the scheduler and it is only used if
		* _every_ other avenue has been traveled.
		**/

		void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
		{
		rcu_read_lock();
		do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus);
		do_set_cpus_allowed(tsk, is_in_v2_mode() ?
		task_cs(tsk)->cpus_allowed : cpu_possible_mask);
		rcu_read_unlock();

		/*

Admin message