Commit abde77eb authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull cgroup updates from Tejun Heo:
 "This includes Roman's cgroup2 freezer implementation.

  It's a separate machanism from cgroup1 freezer. Instead of blocking
  user tasks in arbitrary uninterruptible sleeps, the new implementation
  extends jobctl stop - frozen tasks are trapped in jobctl stop until
  thawed and can be killed and ptraced. Lots of thanks to Oleg for
  sheperding the effort.

  Other than that, there are a few trivial changes"

* 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: never call do_group_exit() with task->frozen bit set
  kernel: cgroup: fix misuse of %x
  cgroup: get rid of cgroup_freezer_frozen_exit()
  cgroup: prevent spurious transition into non-frozen state
  cgroup: Remove unused cgrp variable
  cgroup: document cgroup v2 freezer interface
  cgroup: add tracing points for cgroup v2 freezer
  cgroup: make TRACE_CGROUP_PATH irq-safe
  kselftests: cgroup: add freezer controller self-tests
  kselftests: cgroup: don't fail on cg_kill_all() error in cg_destroy()
  cgroup: cgroup v2 freezer
  cgroup: protect cgroup->nr_(dying_)descendants by css_set_lock
  cgroup: implement __cgroup_task_count() helper
  cgroup: rename freezer.c into legacy_freezer.c
  cgroup: remove extra cgroup_migrate_finish() call
parents 23c97060 f2b31bb5
Loading
Loading
Loading
Loading
+27 −0
Original line number Diff line number Diff line
@@ -864,6 +864,8 @@ All cgroup core files are prefixed with "cgroup."
	  populated
		1 if the cgroup or its descendants contains any live
		processes; otherwise, 0.
	  frozen
		1 if the cgroup is frozen; otherwise, 0.

  cgroup.max.descendants
	A read-write single value files.  The default is "max".
@@ -897,6 +899,31 @@ All cgroup core files are prefixed with "cgroup."
		A dying cgroup can consume system resources not exceeding
		limits, which were active at the moment of cgroup deletion.

  cgroup.freeze
	A read-write single value file which exists on non-root cgroups.
	Allowed values are "0" and "1". The default is "0".

	Writing "1" to the file causes freezing of the cgroup and all
	descendant cgroups. This means that all belonging processes will
	be stopped and will not run until the cgroup will be explicitly
	unfrozen. Freezing of the cgroup may take some time; when this action
	is completed, the "frozen" value in the cgroup.events control file
	will be updated to "1" and the corresponding notification will be
	issued.

	A cgroup can be frozen either by its own settings, or by settings
	of any ancestor cgroups. If any of ancestor cgroups is frozen, the
	cgroup will remain frozen.

	Processes in the frozen cgroup can be killed by a fatal signal.
	They also can enter and leave a frozen cgroup: either by an explicit
	move by a user, or if freezing of the cgroup races with fork().
	If a process is moved to a frozen cgroup, it stops. If a process is
	moved out of a frozen cgroup, it becomes running.

	Frozen status of a cgroup doesn't affect any cgroup tree operations:
	it's possible to delete a frozen (and empty) cgroup, as well as
	create new sub-cgroups.

Controllers
===========
+33 −0
Original line number Diff line number Diff line
@@ -65,6 +65,12 @@ enum {
	 * specified at mount time and thus is implemented here.
	 */
	CGRP_CPUSET_CLONE_CHILDREN,

	/* Control group has to be frozen. */
	CGRP_FREEZE,

	/* Cgroup is frozen. */
	CGRP_FROZEN,
};

/* cgroup_root->flags */
@@ -317,6 +323,25 @@ struct cgroup_rstat_cpu {
	struct cgroup *updated_next;		/* NULL iff not on the list */
};

struct cgroup_freezer_state {
	/* Should the cgroup and its descendants be frozen. */
	bool freeze;

	/* Should the cgroup actually be frozen? */
	int e_freeze;

	/* Fields below are protected by css_set_lock */

	/* Number of frozen descendant cgroups */
	int nr_frozen_descendants;

	/*
	 * Number of tasks, which are counted as frozen:
	 * frozen, SIGSTOPped, and PTRACEd.
	 */
	int nr_frozen_tasks;
};

struct cgroup {
	/* self css with NULL ->ss, points back to this cgroup */
	struct cgroup_subsys_state self;
@@ -349,6 +374,11 @@ struct cgroup {
	 * Dying cgroups are cgroups which were deleted by a user,
	 * but are still existing because someone else is holding a reference.
	 * max_descendants is a maximum allowed number of descent cgroups.
	 *
	 * nr_descendants and nr_dying_descendants are protected
	 * by cgroup_mutex and css_set_lock. It's fine to read them holding
	 * any of cgroup_mutex and css_set_lock; for writing both locks
	 * should be held.
	 */
	int nr_descendants;
	int nr_dying_descendants;
@@ -448,6 +478,9 @@ struct cgroup {
	/* If there is block congestion on this cgroup. */
	atomic_t congestion_count;

	/* Used to store internal freezer state */
	struct cgroup_freezer_state freezer;

	/* ids of the ancestors at each level including self */
	int ancestor_ids[];
};
+43 −0
Original line number Diff line number Diff line
@@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns)
		free_cgroup_ns(ns);
}

#ifdef CONFIG_CGROUPS

void cgroup_enter_frozen(void);
void cgroup_leave_frozen(bool always_leave);
void cgroup_update_frozen(struct cgroup *cgrp);
void cgroup_freeze(struct cgroup *cgrp, bool freeze);
void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
				 struct cgroup *dst);

static inline bool cgroup_task_freeze(struct task_struct *task)
{
	bool ret;

	if (task->flags & PF_KTHREAD)
		return false;

	rcu_read_lock();
	ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags);
	rcu_read_unlock();

	return ret;
}

static inline bool cgroup_task_frozen(struct task_struct *task)
{
	return task->frozen;
}

#else /* !CONFIG_CGROUPS */

static inline void cgroup_enter_frozen(void) { }
static inline void cgroup_leave_frozen(bool always_leave) { }
static inline bool cgroup_task_freeze(struct task_struct *task)
{
	return false;
}
static inline bool cgroup_task_frozen(struct task_struct *task)
{
	return false;
}

#endif /* !CONFIG_CGROUPS */

#endif /* _LINUX_CGROUP_H */
+2 −0
Original line number Diff line number Diff line
@@ -726,6 +726,8 @@ struct task_struct {
#ifdef CONFIG_CGROUPS
	/* disallow userland-initiated cgroup migration */
	unsigned			no_cgroup_migration:1;
	/* task is frozen/stopped (used by the cgroup freezer) */
	unsigned			frozen:1;
#endif
#ifdef CONFIG_BLK_CGROUP
	/* to be used once the psi infrastructure lands upstream. */
+2 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ struct task_struct;
#define JOBCTL_TRAP_NOTIFY_BIT	20	/* trap for NOTIFY */
#define JOBCTL_TRAPPING_BIT	21	/* switching to TRACED */
#define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
#define JOBCTL_TRAP_FREEZE_BIT	23	/* trap for cgroup freezer */

#define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
#define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
@@ -26,6 +27,7 @@ struct task_struct;
#define JOBCTL_TRAP_NOTIFY	(1UL << JOBCTL_TRAP_NOTIFY_BIT)
#define JOBCTL_TRAPPING		(1UL << JOBCTL_TRAPPING_BIT)
#define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
#define JOBCTL_TRAP_FREEZE	(1UL << JOBCTL_TRAP_FREEZE_BIT)

#define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
#define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
Loading