sched/cpufreq: Change the worker kthread to SCHED_DEADLINE (794a56eb) · Commits · 戴 / test

include/linux/sched.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -1431,6 +1431,7 @@ extern int idle_cpu(int cpu);
		extern int sched_setscheduler(struct task_struct , int, const struct sched_param );
		extern int sched_setscheduler_nocheck(struct task_struct , int, const struct sched_param );
		extern int sched_setattr(struct task_struct , const struct sched_attr );
		extern int sched_setattr_nocheck(struct task_struct , const struct sched_attr );
		extern struct task_struct *idle_task(int cpu);

		/**

kernel/sched/core.c

+11 −2

Original line number	Diff line number	Diff line
		@@ -4085,7 +4085,7 @@ recheck:
		return -EINVAL;
		}

		if (attr->sched_flags & ~SCHED_FLAG_ALL)
		if (attr->sched_flags & ~(SCHED_FLAG_ALL \| SCHED_FLAG_SUGOV))
		return -EINVAL;

		/*
		@@ -4152,6 +4152,9 @@ recheck:
		}

		if (user) {
		if (attr->sched_flags & SCHED_FLAG_SUGOV)
		return -EINVAL;

		retval = security_task_setscheduler(p);
		if (retval)
		return retval;
		@@ -4207,7 +4210,8 @@ change:
		}
		#endif
		#ifdef CONFIG_SMP
		if (dl_bandwidth_enabled() && dl_policy(policy)) {
		if (dl_bandwidth_enabled() && dl_policy(policy) &&
		!(attr->sched_flags & SCHED_FLAG_SUGOV)) {
		cpumask_t *span = rq->rd->span;

		/*
		@@ -4337,6 +4341,11 @@ int sched_setattr(struct task_struct p, const struct sched_attr attr)
		}
		EXPORT_SYMBOL_GPL(sched_setattr);

		int sched_setattr_nocheck(struct task_struct p, const struct sched_attr attr)
		{
		return __sched_setscheduler(p, attr, false, true);
		}

		/**
		* sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
		* @p: the task in question.

kernel/sched/cpufreq_schedutil.c

+16 −3

Original line number	Diff line number	Diff line
		@@ -474,7 +474,20 @@ static void sugov_policy_free(struct sugov_policy *sg_policy)
		static int sugov_kthread_create(struct sugov_policy *sg_policy)
		{
		struct task_struct *thread;
		struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
		struct sched_attr attr = {
		.size = sizeof(struct sched_attr),
		.sched_policy = SCHED_DEADLINE,
		.sched_flags = SCHED_FLAG_SUGOV,
		.sched_nice = 0,
		.sched_priority = 0,
		/*
		* Fake (unused) bandwidth; workaround to "fix"
		* priority inheritance.
		*/
		.sched_runtime = 1000000,
		.sched_deadline = 10000000,
		.sched_period = 10000000,
		};
		struct cpufreq_policy *policy = sg_policy->policy;
		int ret;

		@@ -492,10 +505,10 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
		return PTR_ERR(thread);
		}

		ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, &param);
		ret = sched_setattr_nocheck(thread, &attr);
		if (ret) {
		kthread_stop(thread);
		pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
		return ret;
		}

kernel/sched/deadline.c

+73 −30

Original line number	Diff line number	Diff line
		@@ -78,7 +78,7 @@ static inline int dl_bw_cpus(int i)
		#endif

		static inline
		void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
		void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
		{
		u64 old = dl_rq->running_bw;

		@@ -91,7 +91,7 @@ void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
		}

		static inline
		void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
		void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
		{
		u64 old = dl_rq->running_bw;

		@@ -105,7 +105,7 @@ void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
		}

		static inline
		void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
		void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
		{
		u64 old = dl_rq->this_bw;

		@@ -115,7 +115,7 @@ void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
		}

		static inline
		void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
		void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
		{
		u64 old = dl_rq->this_bw;

		@@ -127,16 +127,46 @@ void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
		SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
		}

		static inline
		void add_rq_bw(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
		{
		if (!dl_entity_is_special(dl_se))
		__add_rq_bw(dl_se->dl_bw, dl_rq);
		}

		static inline
		void sub_rq_bw(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
		{
		if (!dl_entity_is_special(dl_se))
		__sub_rq_bw(dl_se->dl_bw, dl_rq);
		}

		static inline
		void add_running_bw(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
		{
		if (!dl_entity_is_special(dl_se))
		__add_running_bw(dl_se->dl_bw, dl_rq);
		}

		static inline
		void sub_running_bw(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
		{
		if (!dl_entity_is_special(dl_se))
		__sub_running_bw(dl_se->dl_bw, dl_rq);
		}

		void dl_change_utilization(struct task_struct *p, u64 new_bw)
		{
		struct rq *rq;

		BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);

		if (task_on_rq_queued(p))
		return;

		rq = task_rq(p);
		if (p->dl.dl_non_contending) {
		sub_running_bw(p->dl.dl_bw, &rq->dl);
		sub_running_bw(&p->dl, &rq->dl);
		p->dl.dl_non_contending = 0;
		/*
		* If the timer handler is currently running and the
		@@ -148,8 +178,8 @@ void dl_change_utilization(struct task_struct *p, u64 new_bw)
		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
		put_task_struct(p);
		}
		sub_rq_bw(p->dl.dl_bw, &rq->dl);
		add_rq_bw(new_bw, &rq->dl);
		__sub_rq_bw(p->dl.dl_bw, &rq->dl);
		__add_rq_bw(new_bw, &rq->dl);
		}

		/*
		@@ -221,6 +251,9 @@ static void task_non_contending(struct task_struct *p)
		if (dl_se->dl_runtime == 0)
		return;

		if (dl_entity_is_special(dl_se))
		return;

		WARN_ON(hrtimer_active(&dl_se->inactive_timer));
		WARN_ON(dl_se->dl_non_contending);

		@@ -240,12 +273,12 @@ static void task_non_contending(struct task_struct *p)
		*/
		if (zerolag_time < 0) {
		if (dl_task(p))
		sub_running_bw(dl_se->dl_bw, dl_rq);
		sub_running_bw(dl_se, dl_rq);
		if (!dl_task(p) \|\| p->state == TASK_DEAD) {
		struct dl_bw *dl_b = dl_bw_of(task_cpu(p));

		if (p->state == TASK_DEAD)
		sub_rq_bw(p->dl.dl_bw, &rq->dl);
		sub_rq_bw(&p->dl, &rq->dl);
		raw_spin_lock(&dl_b->lock);
		__dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
		__dl_clear_params(p);
		@@ -272,7 +305,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
		return;

		if (flags & ENQUEUE_MIGRATED)
		add_rq_bw(dl_se->dl_bw, dl_rq);
		add_rq_bw(dl_se, dl_rq);

		if (dl_se->dl_non_contending) {
		dl_se->dl_non_contending = 0;
		@@ -293,7 +326,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
		* when the "inactive timer" fired).
		* So, add it back.
		*/
		add_running_bw(dl_se->dl_bw, dl_rq);
		add_running_bw(dl_se, dl_rq);
		}
		}

		@@ -1149,6 +1182,9 @@ static void update_curr_dl(struct rq *rq)

		sched_rt_avg_update(rq, delta_exec);

		if (dl_entity_is_special(dl_se))
		return;

		if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM))
		delta_exec = grub_reclaim(delta_exec, rq, &curr->dl);
		dl_se->runtime -= delta_exec;
		@@ -1211,8 +1247,8 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
		struct dl_bw *dl_b = dl_bw_of(task_cpu(p));

		if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
		sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
		sub_rq_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
		sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
		sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
		dl_se->dl_non_contending = 0;
		}

		@@ -1229,7 +1265,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
		sched_clock_tick();
		update_rq_clock(rq);

		sub_running_bw(dl_se->dl_bw, &rq->dl);
		sub_running_bw(dl_se, &rq->dl);
		dl_se->dl_non_contending = 0;
		unlock:
		task_rq_unlock(rq, p, &rf);
		@@ -1423,8 +1459,8 @@ static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags)
		dl_check_constrained_dl(&p->dl);

		if (p->on_rq == TASK_ON_RQ_MIGRATING \|\| flags & ENQUEUE_RESTORE) {
		add_rq_bw(p->dl.dl_bw, &rq->dl);
		add_running_bw(p->dl.dl_bw, &rq->dl);
		add_rq_bw(&p->dl, &rq->dl);
		add_running_bw(&p->dl, &rq->dl);
		}

		/*
		@@ -1464,8 +1500,8 @@ static void dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
		__dequeue_task_dl(rq, p, flags);

		if (p->on_rq == TASK_ON_RQ_MIGRATING \|\| flags & DEQUEUE_SAVE) {
		sub_running_bw(p->dl.dl_bw, &rq->dl);
		sub_rq_bw(p->dl.dl_bw, &rq->dl);
		sub_running_bw(&p->dl, &rq->dl);
		sub_rq_bw(&p->dl, &rq->dl);
		}

		/*
		@@ -1571,7 +1607,7 @@ static void migrate_task_rq_dl(struct task_struct *p)
		*/
		raw_spin_lock(&rq->lock);
		if (p->dl.dl_non_contending) {
		sub_running_bw(p->dl.dl_bw, &rq->dl);
		sub_running_bw(&p->dl, &rq->dl);
		p->dl.dl_non_contending = 0;
		/*
		* If the timer handler is currently running and the
		@@ -1583,7 +1619,7 @@ static void migrate_task_rq_dl(struct task_struct *p)
		if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
		put_task_struct(p);
		}
		sub_rq_bw(p->dl.dl_bw, &rq->dl);
		sub_rq_bw(&p->dl, &rq->dl);
		raw_spin_unlock(&rq->lock);
		}

		@@ -2026,11 +2062,11 @@ retry:
		}

		deactivate_task(rq, next_task, 0);
		sub_running_bw(next_task->dl.dl_bw, &rq->dl);
		sub_rq_bw(next_task->dl.dl_bw, &rq->dl);
		sub_running_bw(&next_task->dl, &rq->dl);
		sub_rq_bw(&next_task->dl, &rq->dl);
		set_task_cpu(next_task, later_rq->cpu);
		add_rq_bw(next_task->dl.dl_bw, &later_rq->dl);
		add_running_bw(next_task->dl.dl_bw, &later_rq->dl);
		add_rq_bw(&next_task->dl, &later_rq->dl);
		add_running_bw(&next_task->dl, &later_rq->dl);
		activate_task(later_rq, next_task, 0);
		ret = 1;

		@@ -2118,11 +2154,11 @@ static void pull_dl_task(struct rq *this_rq)
		resched = true;

		deactivate_task(src_rq, p, 0);
		sub_running_bw(p->dl.dl_bw, &src_rq->dl);
		sub_rq_bw(p->dl.dl_bw, &src_rq->dl);
		sub_running_bw(&p->dl, &src_rq->dl);
		sub_rq_bw(&p->dl, &src_rq->dl);
		set_task_cpu(p, this_cpu);
		add_rq_bw(p->dl.dl_bw, &this_rq->dl);
		add_running_bw(p->dl.dl_bw, &this_rq->dl);
		add_rq_bw(&p->dl, &this_rq->dl);
		add_running_bw(&p->dl, &this_rq->dl);
		activate_task(this_rq, p, 0);
		dmin = p->dl.deadline;

		@@ -2231,7 +2267,7 @@ static void switched_from_dl(struct rq rq, struct task_struct p)
		task_non_contending(p);

		if (!task_on_rq_queued(p))
		sub_rq_bw(p->dl.dl_bw, &rq->dl);
		sub_rq_bw(&p->dl, &rq->dl);

		/*
		* We cannot use inactive_task_timer() to invoke sub_running_bw()
		@@ -2263,7 +2299,7 @@ static void switched_to_dl(struct rq rq, struct task_struct p)

		/* If p is not queued we will update its parameters at next wakeup. */
		if (!task_on_rq_queued(p)) {
		add_rq_bw(p->dl.dl_bw, &rq->dl);
		add_rq_bw(&p->dl, &rq->dl);

		return;
		}
		@@ -2442,6 +2478,9 @@ int sched_dl_overflow(struct task_struct *p, int policy,
		u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
		int cpus, err = -1;

		if (attr->sched_flags & SCHED_FLAG_SUGOV)
		return 0;

		/* !deadline task may carry old deadline bandwidth */
		if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
		return 0;
		@@ -2528,6 +2567,10 @@ void __getparam_dl(struct task_struct p, struct sched_attr attr)
		*/
		bool __checkparam_dl(const struct sched_attr *attr)
		{
		/* special dl tasks don't actually use any parameter */
		if (attr->sched_flags & SCHED_FLAG_SUGOV)
		return true;

		/* deadline != 0 */
		if (attr->sched_deadline == 0)
		return false;

kernel/sched/sched.h

+29 −1

Original line number	Diff line number	Diff line
		@@ -156,13 +156,37 @@ static inline int task_has_dl_policy(struct task_struct *p)
		return dl_policy(p->policy);
		}

		/*
		* !! For sched_setattr_nocheck() (kernel) only !!
		*
		* This is actually gross. :(
		*
		* It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
		* tasks, but still be able to sleep. We need this on platforms that cannot
		* atomically change clock frequency. Remove once fast switching will be
		* available on such platforms.
		*
		* SUGOV stands for SchedUtil GOVernor.
		*/
		#define SCHED_FLAG_SUGOV 0x10000000

		static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se)
		{
		#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
		return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
		#else
		return false;
		#endif
		}

		/*
		* Tells if entity @a should preempt entity @b.
		*/
		static inline bool
		dl_entity_preempt(struct sched_dl_entity a, struct sched_dl_entity b)
		{
		return dl_time_before(a->deadline, b->deadline);
		return dl_entity_is_special(a) \|\|
		dl_time_before(a->deadline, b->deadline);
		}

		/*
		@@ -2085,6 +2109,8 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
		#define arch_scale_freq_invariant() (false)
		#endif

		#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL

		static inline unsigned long cpu_util_dl(struct rq *rq)
		{
		return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
		@@ -2094,3 +2120,5 @@ static inline unsigned long cpu_util_cfs(struct rq *rq)
		{
		return rq->cfs.avg.util_avg;
		}

		#endif

Admin message