Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (ef78e5b7) · Commits · 戴 / test

include/linux/sched/nohz.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -15,9 +15,11 @@ static inline void nohz_balance_enter_idle(int cpu) { }

		#ifdef CONFIG_NO_HZ_COMMON
		void calc_load_nohz_start(void);
		void calc_load_nohz_remote(struct rq *rq);
		void calc_load_nohz_stop(void);
		#else
		static inline void calc_load_nohz_start(void) { }
		static inline void calc_load_nohz_remote(struct rq *rq) { }
		static inline void calc_load_nohz_stop(void) { }
		#endif /* CONFIG_NO_HZ_COMMON */

kernel/sched/core.c

+34 −29

Original line number	Diff line number	Diff line
		@@ -552,27 +552,32 @@ void resched_cpu(int cpu)
		*/
		int get_nohz_timer_target(void)
		{
		int i, cpu = smp_processor_id();
		int i, cpu = smp_processor_id(), default_cpu = -1;
		struct sched_domain *sd;

		if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
		if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
		if (!idle_cpu(cpu))
		return cpu;
		default_cpu = cpu;
		}

		rcu_read_lock();
		for_each_domain(cpu, sd) {
		for_each_cpu(i, sched_domain_span(sd)) {
		for_each_cpu_and(i, sched_domain_span(sd),
		housekeeping_cpumask(HK_FLAG_TIMER)) {
		if (cpu == i)
		continue;

		if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
		if (!idle_cpu(i)) {
		cpu = i;
		goto unlock;
		}
		}
		}

		if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
		cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
		if (default_cpu == -1)
		default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
		cpu = default_cpu;
		unlock:
		rcu_read_unlock();
		return cpu;
		@@ -1442,17 +1447,6 @@ void check_preempt_curr(struct rq rq, struct task_struct p, int flags)

		#ifdef CONFIG_SMP

		static inline bool is_per_cpu_kthread(struct task_struct *p)
		{
		if (!(p->flags & PF_KTHREAD))
		return false;

		if (p->nr_cpus_allowed != 1)
		return false;

		return true;
		}

		/*
		* Per-CPU kthreads are allowed to run on !active && online CPUs, see
		* __set_cpus_allowed_ptr() and select_fallback_rq().
		@@ -3669,28 +3663,32 @@ static void sched_tick_remote(struct work_struct *work)
		* statistics and checks timeslices in a time-independent way, regardless
		* of when exactly it is running.
		*/
		if (idle_cpu(cpu) \|\| !tick_nohz_tick_stopped_cpu(cpu))
		if (!tick_nohz_tick_stopped_cpu(cpu))
		goto out_requeue;

		rq_lock_irq(rq, &rf);
		curr = rq->curr;
		if (is_idle_task(curr) \|\| cpu_is_offline(cpu))
		if (cpu_is_offline(cpu))
		goto out_unlock;

		curr = rq->curr;
		update_rq_clock(rq);
		delta = rq_clock_task(rq) - curr->se.exec_start;

		if (!is_idle_task(curr)) {
		/*
		* Make sure the next tick runs within a reasonable
		* amount of time.
		*/
		delta = rq_clock_task(rq) - curr->se.exec_start;
		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
		}
		curr->sched_class->task_tick(rq, curr, 0);

		calc_load_nohz_remote(rq);
		out_unlock:
		rq_unlock_irq(rq, &rf);

		out_requeue:

		/*
		* Run the remote tick once per second (1Hz). This arbitrary
		* frequency is large enough to avoid overload but short enough
		@@ -7063,8 +7061,15 @@ void sched_move_task(struct task_struct *tsk)

		if (queued)
		enqueue_task(rq, tsk, queue_flags);
		if (running)
		if (running) {
		set_next_task(rq, tsk);
		/*
		* After changing group, the running task may have joined a
		* throttled one but it's still the running task. Trigger a
		* resched to make sure that task can still run.
		*/
		resched_curr(rq);
		}

		task_rq_unlock(rq, tsk, &rf);
		}
		@@ -7260,7 +7265,7 @@ capacity_from_percent(char *buf)
		&req.percent);
		if (req.ret)
		return req;
		if (req.percent > UCLAMP_PERCENT_SCALE) {
		if ((u64)req.percent > UCLAMP_PERCENT_SCALE) {
		req.ret = -ERANGE;
		return req;
		}

kernel/sched/fair.c

+43 −13

Original line number	Diff line number	Diff line
		@@ -3516,7 +3516,6 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
		* attach_entity_load_avg - attach this entity to its cfs_rq load avg
		* @cfs_rq: cfs_rq to attach to
		* @se: sched_entity to attach
		* @flags: migration hints
		*
		* Must call update_cfs_rq_load_avg() before this, since we rely on
		* cfs_rq->avg.last_update_time being current.
		@@ -5912,6 +5911,20 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
		(available_idle_cpu(prev) \|\| sched_idle_cpu(prev)))
		return prev;

		/*
		* Allow a per-cpu kthread to stack with the wakee if the
		* kworker thread and the tasks previous CPUs are the same.
		* The assumption is that the wakee queued work for the
		* per-cpu kthread that is now complete and the wakeup is
		* essentially a sync wakeup. An obvious example of this
		* pattern is IO completions.
		*/
		if (is_per_cpu_kthread(current) &&
		prev == smp_processor_id() &&
		this_rq()->nr_running <= 1) {
		return prev;
		}

		/* Check a recently used CPU as a potential idle candidate: */
		recent_used_cpu = p->recent_used_cpu;
		if (recent_used_cpu != prev &&
		@@ -8658,10 +8671,6 @@ static inline void calculate_imbalance(struct lb_env env, struct sd_lb_stats s
		/*
		* Try to use spare capacity of local group without overloading it or
		* emptying busiest.
		* XXX Spreading tasks across NUMA nodes is not always the best policy
		* and special care should be taken for SD_NUMA domain level before
		* spreading the tasks. For now, load_balance() fully relies on
		* NUMA_BALANCING and fbq_classify_group/rq to override the decision.
		*/
		if (local->group_type == group_has_spare) {
		if (busiest->group_type > group_fully_busy) {
		@@ -8701,8 +8710,7 @@ static inline void calculate_imbalance(struct lb_env env, struct sd_lb_stats s
		env->migration_type = migrate_task;
		lsub_positive(&nr_diff, local->sum_nr_running);
		env->imbalance = nr_diff >> 1;
		return;
		}
		} else {

		/*
		* If there is no overload, we just want to even the number of
		@@ -8711,6 +8719,28 @@ static inline void calculate_imbalance(struct lb_env env, struct sd_lb_stats s
		env->migration_type = migrate_task;
		env->imbalance = max_t(long, 0, (local->idle_cpus -
		busiest->idle_cpus) >> 1);
		}

		/* Consider allowing a small imbalance between NUMA groups */
		if (env->sd->flags & SD_NUMA) {
		unsigned int imbalance_min;

		/*
		* Compute an allowed imbalance based on a simple
		* pair of communicating tasks that should remain
		* local and ignore them.
		*
		* NOTE: Generally this would have been based on
		* the domain size and this was evaluated. However,
		* the benefit is similar across a range of workloads
		* and machines but scaling by the domain size adds
		* the risk that lower domains have to be rebalanced.
		*/
		imbalance_min = 2;
		if (busiest->sum_nr_running <= imbalance_min)
		env->imbalance = 0;
		}

		return;
		}

kernel/sched/loadavg.c

+23 −10

Original line number	Diff line number	Diff line
		@@ -231,16 +231,11 @@ static inline int calc_load_read_idx(void)
		return calc_load_idx & 1;
		}

		void calc_load_nohz_start(void)
		static void calc_load_nohz_fold(struct rq *rq)
		{
		struct rq *this_rq = this_rq();
		long delta;

		/*
		* We're going into NO_HZ mode, if there's any pending delta, fold it
		* into the pending NO_HZ delta.
		*/
		delta = calc_load_fold_active(this_rq, 0);
		delta = calc_load_fold_active(rq, 0);
		if (delta) {
		int idx = calc_load_write_idx();

		@@ -248,6 +243,24 @@ void calc_load_nohz_start(void)
		}
		}

		void calc_load_nohz_start(void)
		{
		/*
		* We're going into NO_HZ mode, if there's any pending delta, fold it
		* into the pending NO_HZ delta.
		*/
		calc_load_nohz_fold(this_rq());
		}

		/*
		* Keep track of the load for NOHZ_FULL, must be called between
		* calc_load_nohz_{start,stop}().
		*/
		void calc_load_nohz_remote(struct rq *rq)
		{
		calc_load_nohz_fold(rq);
		}

		void calc_load_nohz_stop(void)
		{
		struct rq *this_rq = this_rq();
		@@ -268,7 +281,7 @@ void calc_load_nohz_stop(void)
		this_rq->calc_load_update += LOAD_FREQ;
		}

		static long calc_load_nohz_fold(void)
		static long calc_load_nohz_read(void)
		{
		int idx = calc_load_read_idx();
		long delta = 0;
		@@ -323,7 +336,7 @@ static void calc_global_nohz(void)
		}
		#else /* !CONFIG_NO_HZ_COMMON */

		static inline long calc_load_nohz_fold(void) { return 0; }
		static inline long calc_load_nohz_read(void) { return 0; }
		static inline void calc_global_nohz(void) { }

		#endif /* CONFIG_NO_HZ_COMMON */
		@@ -346,7 +359,7 @@ void calc_global_load(unsigned long ticks)
		/*
		* Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
		*/
		delta = calc_load_nohz_fold();
		delta = calc_load_nohz_read();
		if (delta)
		atomic_long_add(delta, &calc_load_tasks);

kernel/sched/psi.c

+3 −0

Original line number	Diff line number	Diff line
		@@ -1199,6 +1199,9 @@ static ssize_t psi_write(struct file file, const char __user user_buf,
		if (static_branch_likely(&psi_disabled))
		return -EOPNOTSUPP;

		if (!nbytes)
		return -EINVAL;

		buf_size = min(nbytes, sizeof(buf));
		if (copy_from_user(buf, user_buf, buf_size))
		return -EFAULT;

Admin message