Commit 5a1c9558 authored by Frederic Weisbecker's avatar Frederic Weisbecker Committed by Ingo Molnar
Browse files

sched/cputime: Support other fields on kcpustat_field()



Provide support for user, nice, guest and guest_nice fields through
kcpustat_field().

Whether we account the delta to a nice or not nice field is decided on
top of the nice value snapshot taken at the time we call kcpustat_field().
If the nice value of the task has been changed since the last vtime
update, we may have inacurrate distribution of the nice VS unnice
cputime.

However this is considered as a minor issue compared to the proper fix
that would involve interrupting the target on nice updates, which is
undesired on nohz_full CPUs.

Signed-off-by: default avatarFrederic Weisbecker <frederic@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Link: https://lkml.kernel.org/r/20191121024430.19938-2-frederic@kernel.org


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent bef69dd8
Loading
Loading
Loading
Loading
+43 −11
Original line number Diff line number Diff line
@@ -912,11 +912,21 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
	} while (read_seqcount_retry(&vtime->seqcount, seq));
}

static u64 kcpustat_user_vtime(struct vtime *vtime)
{
	if (vtime->state == VTIME_USER)
		return vtime->utime + vtime_delta(vtime);
	else if (vtime->state == VTIME_GUEST)
		return vtime->gtime + vtime_delta(vtime);
	return 0;
}

static int kcpustat_field_vtime(u64 *cpustat,
				struct vtime *vtime,
				struct task_struct *tsk,
				enum cpu_usage_stat usage,
				int cpu, u64 *val)
{
	struct vtime *vtime = &tsk->vtime;
	unsigned int seq;
	int err;

@@ -946,9 +956,37 @@ static int kcpustat_field_vtime(u64 *cpustat,

		*val = cpustat[usage];

		/*
		 * Nice VS unnice cputime accounting may be inaccurate if
		 * the nice value has changed since the last vtime update.
		 * But proper fix would involve interrupting target on nice
		 * updates which is a no go on nohz_full (although the scheduler
		 * may still interrupt the target if rescheduling is needed...)
		 */
		switch (usage) {
		case CPUTIME_SYSTEM:
			if (vtime->state == VTIME_SYS)
				*val += vtime->stime + vtime_delta(vtime);

			break;
		case CPUTIME_USER:
			if (task_nice(tsk) <= 0)
				*val += kcpustat_user_vtime(vtime);
			break;
		case CPUTIME_NICE:
			if (task_nice(tsk) > 0)
				*val += kcpustat_user_vtime(vtime);
			break;
		case CPUTIME_GUEST:
			if (vtime->state == VTIME_GUEST && task_nice(tsk) <= 0)
				*val += vtime->gtime + vtime_delta(vtime);
			break;
		case CPUTIME_GUEST_NICE:
			if (vtime->state == VTIME_GUEST && task_nice(tsk) > 0)
				*val += vtime->gtime + vtime_delta(vtime);
			break;
		default:
			break;
		}
	} while (read_seqcount_retry(&vtime->seqcount, seq));

	return 0;
@@ -965,15 +1003,10 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat,
	if (!vtime_accounting_enabled_cpu(cpu))
		return cpustat[usage];

	/* Only support sys vtime for now */
	if (usage != CPUTIME_SYSTEM)
		return cpustat[usage];

	rq = cpu_rq(cpu);

	for (;;) {
		struct task_struct *curr;
		struct vtime *vtime;

		rcu_read_lock();
		curr = rcu_dereference(rq->curr);
@@ -982,8 +1015,7 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat,
			return cpustat[usage];
		}

		vtime = &curr->vtime;
		err = kcpustat_field_vtime(cpustat, vtime, usage, cpu, &val);
		err = kcpustat_field_vtime(cpustat, curr, usage, cpu, &val);
		rcu_read_unlock();

		if (!err)