I'm sorry for the patchbomb, especially as I usually complain about
these myself but I don't see any way to split this patchset into
standalone pieces, none of which would make any sense... All I can do
is to isolate about 3 cleanup patches.
So currently, cputime_t serves the purpose, for s390 and
powerpc (on CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y), to avoid converting
arch clock counters to nanosecs or jiffies while accounting cputime.
But this comes at the cost of a lot of complexity and uglification
in the core code to deal with such an opaque type that relies on lots of
mutators and accessors in order to deal with a random granularity time
unit that also involve lots of workarounds and likely some performance
penalties.
So this patchset proposes to convert most of the cputime_t uses to nsecs.
In the end it's only used by s390 and powerpc. This all comes at the
expense of those two archs which then need to perform a cputime_to_nsec()
conversion everytime they update the cputime to the core. Now I expect
we can leverage this performance loss with flushing the cputime only on
ticks so that we accumulate time as cputime_t in between and make the
conversions more rare.
git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git
cputime/nsecs-v2
HEAD: cf109941db5da247569ee99a6effe00f62537082
Thanks,
Frederic
---
Frederic Weisbecker (36):
jiffies: Reuse TICK_NSEC instead of NSEC_PER_JIFFY
time: Introduce jiffies64_to_nsecs()
sched: Remove unused INIT_CPUTIME macro
cputime: Convert kcpustat to nsecs
macintosh/rack-meter: Remove cputime_t internal use
cputime: Convert guest time accounting to nsecs
cputime: Special API to return old-typed cputime
cputime: Convert task/group cputime to nsecs
alpha: Convert obsolete cputime_t to nsecs
x86: Convert obsolete cputime type to nsecs
isdn: Convert obsolete cputime type to nsecs
binfmt: Convert obsolete cputime type to nsecs
acct: Convert obsolete cputime type to nsecs
delaycct: Convert obsolete cputime type to nsecs
tsacct: Convert obsolete cputime type to nsecs
signal: Convert obsolete cputime type to nsecs
cputime: Increment kcpustat directly on irqtime account
posix-timers: Use TICK_NSEC instead of a dynamically ad-hoc calculated version
posix-timers: Convert internals to use nsecs
itimer: Convert internal cputime_t units to nsec
sched: Remove temporary cputime_t accessors
cputime: Push time to account_user_time() in nsecs
cputime: Push time to account_steal_time() in nsecs
cputime: Push time to account_idle_time() in nsecs
cputime: Push time to account_system_time() in nsecs
cputime: Complete nsec conversion of tick based accounting
vtime: Return nsecs instead of cputime_t to account
cputime: Remove jiffies based cputime
ia64: Move nsecs based cputime headers to the last arch using it
ia64: Convert vtime to use nsec units directly
ia64: Remove unused cputime definitions
s390: Make arch_cpu_idle_time() to return nsecs
powerpc: Remove unused cputime definitions
s390: Remove unused cputime definitions
cputime: Remove unused nsec_to_cputime
cputime: Remove asm generic headers
arch/alpha/include/asm/Kbuild | 1 -
arch/alpha/kernel/osf_sys.c | 10 +-
arch/arc/include/asm/Kbuild | 1 -
arch/arm/include/asm/Kbuild | 1 -
arch/arm64/include/asm/Kbuild | 1 -
arch/avr32/include/asm/Kbuild | 1 -
arch/blackfin/include/asm/Kbuild | 1 -
arch/c6x/include/asm/Kbuild | 1 -
arch/cris/include/asm/Kbuild | 1 -
arch/frv/include/asm/Kbuild | 1 -
arch/h8300/include/asm/Kbuild | 1 -
arch/hexagon/include/asm/Kbuild | 1 -
arch/ia64/include/asm/cputime.h | 6 +-
arch/ia64/kernel/head.S | 4 +-
arch/ia64/kernel/time.c | 14 +--
arch/m32r/include/asm/Kbuild | 1 -
arch/m68k/include/asm/Kbuild | 1 -
arch/metag/include/asm/Kbuild | 1 -
arch/microblaze/include/asm/Kbuild | 1 -
arch/mips/include/asm/Kbuild | 1 -
arch/mips/kernel/binfmt_elfn32.c | 11 ---
arch/mips/kernel/binfmt_elfo32.c | 11 ---
arch/mn10300/include/asm/Kbuild | 1 -
arch/nios2/include/asm/Kbuild | 1 -
arch/openrisc/include/asm/Kbuild | 1 -
arch/parisc/include/asm/Kbuild | 1 -
arch/parisc/kernel/binfmt_elf32.c | 10 --
arch/powerpc/include/asm/cputime.h | 177 +---------------------------------
arch/powerpc/kernel/time.c | 33 ++-----
arch/s390/appldata/appldata_os.c | 16 +--
arch/s390/include/asm/cputime.h | 109 +--------------------
arch/s390/kernel/idle.c | 7 +-
arch/s390/kernel/vtime.c | 14 +--
arch/score/include/asm/Kbuild | 1 -
arch/sh/include/asm/Kbuild | 1 -
arch/sparc/include/asm/Kbuild | 1 -
arch/tile/include/asm/Kbuild | 1 -
arch/um/include/asm/Kbuild | 1 -
arch/unicore32/include/asm/Kbuild | 1 -
arch/x86/include/asm/Kbuild | 1 -
arch/x86/kernel/apm_32.c | 6 +-
arch/x86/kvm/hyperv.c | 5 +-
arch/xtensa/include/asm/Kbuild | 1 -
drivers/cpufreq/cpufreq.c | 6 +-
drivers/cpufreq/cpufreq_governor.c | 2 +-
drivers/cpufreq/cpufreq_stats.c | 1 -
drivers/isdn/mISDN/stack.c | 4 +-
drivers/macintosh/rack-meter.c | 28 +++---
fs/binfmt_elf.c | 15 +--
fs/binfmt_elf_fdpic.c | 14 +--
fs/compat_binfmt_elf.c | 20 ++--
fs/proc/array.c | 16 +--
fs/proc/stat.c | 64 ++++++------
fs/proc/uptime.c | 7 +-
include/asm-generic/cputime.h | 15 ---
include/asm-generic/cputime_jiffies.h | 75 --------------
include/asm-generic/cputime_nsecs.h | 121 -----------------------
include/linux/cputime.h | 7 +-
include/linux/jiffies.h | 2 +
include/linux/kernel_stat.h | 9 +-
include/linux/posix-timers.h | 14 +--
include/linux/sched.h | 62 +++++-------
include/trace/events/timer.h | 26 ++---
kernel/acct.c | 7 +-
kernel/delayacct.c | 6 +-
kernel/exit.c | 4 +-
kernel/fork.c | 2 +-
kernel/sched/cpuacct.c | 2 +-
kernel/sched/cputime.c | 165 +++++++++++++------------------
kernel/sched/sched.h | 7 +-
kernel/sched/stats.h | 4 +-
kernel/signal.c | 12 +--
kernel/sys.c | 16 +--
kernel/time/itimer.c | 60 ++++--------
kernel/time/jiffies.c | 32 +++---
kernel/time/posix-cpu-timers.c | 170 ++++++++++++--------------------
kernel/time/time.c | 10 ++
kernel/time/timeconst.bc | 6 ++
kernel/tsacct.c | 21 ++--
79 files changed, 408 insertions(+), 1085 deletions(-)
This will be needed for the cputime_t to nsec conversion.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/jiffies.h | 2 ++
kernel/time/time.c | 10 ++++++++++
kernel/time/timeconst.bc | 6 ++++++
3 files changed, 18 insertions(+)
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 589d14e..624215c 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -293,6 +293,8 @@ static inline u64 jiffies_to_nsecs(const unsigned long j)
return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC;
}
+extern u64 jiffies64_to_nsecs(u64 j);
+
extern unsigned long __msecs_to_jiffies(const unsigned int m);
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
/*
diff --git a/kernel/time/time.c b/kernel/time/time.c
index bd62fb8..0653d28 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -702,6 +702,16 @@ u64 nsec_to_clock_t(u64 x)
#endif
}
+u64 jiffies64_to_nsecs(u64 j)
+{
+#if !(NSEC_PER_SEC % HZ)
+ return (NSEC_PER_SEC / HZ) * j;
+# else
+ return div_u64(j * HZ_TO_NSEC_NUM, HZ_TO_NSEC_DEN);
+#endif
+}
+EXPORT_SYMBOL(jiffies64_to_nsecs);
+
/**
* nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
*
diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc
index c486889..f83bbb8 100644
--- a/kernel/time/timeconst.bc
+++ b/kernel/time/timeconst.bc
@@ -98,6 +98,12 @@ define timeconst(hz) {
print "#define HZ_TO_USEC_DEN\t\t", hz/cd, "\n"
print "#define USEC_TO_HZ_NUM\t\t", hz/cd, "\n"
print "#define USEC_TO_HZ_DEN\t\t", 1000000/cd, "\n"
+
+ cd=gcd(hz,1000000000)
+ print "#define HZ_TO_NSEC_NUM\t\t", 1000000000/cd, "\n"
+ print "#define HZ_TO_NSEC_DEN\t\t", hz/cd, "\n"
+ print "#define NSEC_TO_HZ_NUM\t\t", hz/cd, "\n"
+ print "#define NSEC_TO_HZ_DEN\t\t", 1000000000/cd, "\n"
print "\n"
print "#endif /* KERNEL_TIMECONST_H */\n"
--
2.7.4
This API returns the tasks cputime in cputime_t in order to ease the
conversion of cputime internals to use nsecs units instead. Blindly
converting all cputime readers to use this API now will later let us
convert more smoothly and step by step all these places to use the
new nsec based cputime.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/alpha/kernel/osf_sys.c | 2 +-
arch/x86/kernel/apm_32.c | 2 +-
drivers/isdn/mISDN/stack.c | 2 +-
fs/binfmt_elf.c | 6 +++---
fs/binfmt_elf_fdpic.c | 6 +++---
include/linux/sched.h | 32 ++++++++++++++++++++++++++---
kernel/acct.c | 2 +-
kernel/delayacct.c | 4 ++--
kernel/signal.c | 4 ++--
kernel/time/itimer.c | 2 +-
kernel/time/posix-cpu-timers.c | 46 +++++++++++++++++++++---------------------
kernel/tsacct.c | 6 +++---
12 files changed, 70 insertions(+), 44 deletions(-)
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index ffb93f49..c26d631 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1146,7 +1146,7 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
memset(&r, 0, sizeof(r));
switch (who) {
case RUSAGE_SELF:
- task_cputime(current, &utime, &stime);
+ task_cputime_t(current, &utime, &stime);
utime_jiffies = cputime_to_jiffies(utime);
stime_jiffies = cputime_to_jiffies(stime);
jiffies_to_timeval32(utime_jiffies, &r.ru_utime);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d90749b..b578839 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -913,7 +913,7 @@ static int apm_cpu_idle(struct cpuidle_device *dev,
unsigned int bucket;
recalc:
- task_cputime(current, &utime, &stime);
+ task_cputime_t(current, &utime, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
} else if (jiffies_since_last_check > idle_period) {
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index 9cb4b62..0a36617 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -306,7 +306,7 @@ mISDNStackd(void *data)
"msg %d sleep %d stopped\n",
dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
st->stopped_cnt);
- task_cputime(st->thread, &utime, &stime);
+ task_cputime_t(st->thread, &utime, &stime);
printk(KERN_DEBUG
"mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
dev_name(&st->dev->dev), utime, stime);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 2472af2..0f62ac5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1421,19 +1421,19 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
prstatus->pr_pgrp = task_pgrp_vnr(p);
prstatus->pr_sid = task_session_vnr(p);
if (thread_group_leader(p)) {
- struct task_cputime cputime;
+ struct task_cputime_t cputime;
/*
* This is the record for the group leader. It shows the
* group-wide total, not its individual thread total.
*/
- thread_group_cputime(p, &cputime);
+ thread_group_cputime_t(p, &cputime);
cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
} else {
cputime_t utime, stime;
- task_cputime(p, &utime, &stime);
+ task_cputime_t(p, &utime, &stime);
cputime_to_timeval(utime, &prstatus->pr_utime);
cputime_to_timeval(stime, &prstatus->pr_stime);
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 464a972..29e175d 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1342,19 +1342,19 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
prstatus->pr_pgrp = task_pgrp_vnr(p);
prstatus->pr_sid = task_session_vnr(p);
if (thread_group_leader(p)) {
- struct task_cputime cputime;
+ struct task_cputime_t cputime;
/*
* This is the record for the group leader. It shows the
* group-wide total, not its individual thread total.
*/
- thread_group_cputime(p, &cputime);
+ thread_group_cputime_t(p, &cputime);
cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
} else {
cputime_t utime, stime;
- task_cputime(p, &utime, &stime);
+ task_cputime_t(p, &utime, &stime);
cputime_to_timeval(utime, &prstatus->pr_utime);
cputime_to_timeval(stime, &prstatus->pr_stime);
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5854e70..6cf884d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -613,6 +613,13 @@ struct task_cputime {
unsigned long long sum_exec_runtime;
};
+/* Temporary type to ease cputime_t to nsecs conversion */
+struct task_cputime_t {
+ cputime_t utime;
+ cputime_t stime;
+ unsigned long long sum_exec_runtime;
+};
+
/* Alternate field names when used to cache expirations. */
#define virt_exp utime
#define prof_exp stime
@@ -746,7 +753,7 @@ struct signal_struct {
struct thread_group_cputimer cputimer;
/* Earliest-expiration cache. */
- struct task_cputime cputime_expires;
+ struct task_cputime_t cputime_expires;
#ifdef CONFIG_NO_HZ_FULL
atomic_t tick_dep_mask;
@@ -1668,7 +1675,7 @@ struct task_struct {
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
- struct task_cputime cputime_expires;
+ struct task_cputime_t cputime_expires;
struct list_head cpu_timers[3];
/* process credentials */
@@ -2268,6 +2275,19 @@ static inline void task_cputime_scaled(struct task_struct *t,
}
#endif
+static inline void task_cputime_t(struct task_struct *t,
+ cputime_t *utime, cputime_t *stime)
+{
+ task_cputime(t, utime, stime);
+}
+
+static inline void task_cputime_t_scaled(struct task_struct *t,
+ cputime_t *utimescaled,
+ cputime_t *stimescaled)
+{
+ task_cputime_scaled(t, utimescaled, stimescaled);
+}
+
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
@@ -3470,7 +3490,13 @@ static __always_inline bool need_resched(void)
* Thread group CPU time accounting.
*/
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times);
+
+static inline void thread_group_cputime_t(struct task_struct *tsk,
+ struct task_cputime_t *times)
+{
+ thread_group_cputime(tsk, (struct task_cputime *)times);
+}
/*
* Reevaluate whether the task has signals pending delivery.
diff --git a/kernel/acct.c b/kernel/acct.c
index 74963d1..b9b190a8 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -559,7 +559,7 @@ void acct_collect(long exitcode, int group_dead)
pacct->ac_flag |= ACORE;
if (current->flags & PF_SIGNALED)
pacct->ac_flag |= AXSIG;
- task_cputime(current, &utime, &stime);
+ task_cputime_t(current, &utime, &stime);
pacct->ac_utime += utime;
pacct->ac_stime += stime;
pacct->ac_minflt += current->min_flt;
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 435c14a..228640f 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -87,12 +87,12 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
unsigned long flags, t1;
s64 tmp;
- task_cputime(tsk, &utime, &stime);
+ task_cputime_t(tsk, &utime, &stime);
tmp = (s64)d->cpu_run_real_total;
tmp += cputime_to_nsecs(utime + stime);
d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
- task_cputime_scaled(tsk, &utimescaled, &stimescaled);
+ task_cputime_t_scaled(tsk, &utimescaled, &stimescaled);
tmp = (s64)d->cpu_scaled_run_real_total;
tmp += cputime_to_nsecs(utimescaled + stimescaled);
d->cpu_scaled_run_real_total =
diff --git a/kernel/signal.c b/kernel/signal.c
index 75761ac..db189b4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1613,7 +1613,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
task_uid(tsk));
rcu_read_unlock();
- task_cputime(tsk, &utime, &stime);
+ task_cputime_t(tsk, &utime, &stime);
info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime);
info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime);
@@ -1698,7 +1698,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
rcu_read_unlock();
- task_cputime(tsk, &utime, &stime);
+ task_cputime_t(tsk, &utime, &stime);
info.si_utime = cputime_to_clock_t(utime);
info.si_stime = cputime_to_clock_t(stime);
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 1d5c720..bc4ba4c 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -53,7 +53,7 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
cval = it->expires;
cinterval = it->incr;
if (cval) {
- struct task_cputime cputime;
+ struct task_cputime_t cputime;
cputime_t t;
thread_group_cputimer(tsk, &cputime);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index e887ffc..286c590 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -116,7 +116,7 @@ static void bump_cpu_timer(struct k_itimer *timer,
* Checks @cputime to see if all fields are zero. Returns true if all fields
* are zero, false if any field is nonzero.
*/
-static inline int task_cputime_zero(const struct task_cputime *cputime)
+static inline int task_cputime_zero(const struct task_cputime_t *cputime)
{
if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
return 1;
@@ -127,7 +127,7 @@ static inline unsigned long long prof_ticks(struct task_struct *p)
{
cputime_t utime, stime;
- task_cputime(p, &utime, &stime);
+ task_cputime_t(p, &utime, &stime);
return cputime_to_expires(utime + stime);
}
@@ -135,7 +135,7 @@ static inline unsigned long long virt_ticks(struct task_struct *p)
{
cputime_t utime, stime;
- task_cputime(p, &utime, &stime);
+ task_cputime_t(p, &utime, &stime);
return cputime_to_expires(utime);
}
@@ -211,7 +211,7 @@ static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
}
}
-static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum)
+static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime_t *sum)
{
__update_gt_cputime(&cputime_atomic->utime, sum->utime);
__update_gt_cputime(&cputime_atomic->stime, sum->stime);
@@ -219,7 +219,7 @@ static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct
}
/* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */
-static inline void sample_cputime_atomic(struct task_cputime *times,
+static inline void sample_cputime_atomic(struct task_cputime_t *times,
struct task_cputime_atomic *atomic_times)
{
times->utime = atomic64_read(&atomic_times->utime);
@@ -227,10 +227,10 @@ static inline void sample_cputime_atomic(struct task_cputime *times,
times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime);
}
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times)
{
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
- struct task_cputime sum;
+ struct task_cputime_t sum;
/* Check if cputimer isn't running. This is accessed without locking. */
if (!READ_ONCE(cputimer->running)) {
@@ -239,7 +239,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
* values through the TIMER_ABSTIME flag, therefore we have
* to synchronize the timer to the clock every time we start it.
*/
- thread_group_cputime(tsk, &sum);
+ thread_group_cputime_t(tsk, &sum);
update_gt_cputime(&cputimer->cputime_atomic, &sum);
/*
@@ -263,21 +263,21 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
struct task_struct *p,
unsigned long long *sample)
{
- struct task_cputime cputime;
+ struct task_cputime_t cputime;
switch (CPUCLOCK_WHICH(which_clock)) {
default:
return -EINVAL;
case CPUCLOCK_PROF:
- thread_group_cputime(p, &cputime);
+ thread_group_cputime_t(p, &cputime);
*sample = cputime_to_expires(cputime.utime + cputime.stime);
break;
case CPUCLOCK_VIRT:
- thread_group_cputime(p, &cputime);
+ thread_group_cputime_t(p, &cputime);
*sample = cputime_to_expires(cputime.utime);
break;
case CPUCLOCK_SCHED:
- thread_group_cputime(p, &cputime);
+ thread_group_cputime_t(p, &cputime);
*sample = cputime.sum_exec_runtime;
break;
}
@@ -470,7 +470,7 @@ static void arm_timer(struct k_itimer *timer)
{
struct task_struct *p = timer->it.cpu.task;
struct list_head *head, *listpos;
- struct task_cputime *cputime_expires;
+ struct task_cputime_t *cputime_expires;
struct cpu_timer_list *const nt = &timer->it.cpu;
struct cpu_timer_list *next;
@@ -566,7 +566,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
struct task_struct *p,
unsigned long long *sample)
{
- struct task_cputime cputime;
+ struct task_cputime_t cputime;
thread_group_cputimer(p, &cputime);
switch (CPUCLOCK_WHICH(which_clock)) {
@@ -765,7 +765,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
/*
* Protect against sighand release/switch in exit/exec and
* also make timer sampling safe if it ends up calling
- * thread_group_cputime().
+ * thread_group_cputime_t().
*/
sighand = lock_task_sighand(p, &flags);
if (unlikely(sighand == NULL)) {
@@ -830,7 +830,7 @@ static void check_thread_timers(struct task_struct *tsk,
{
struct list_head *timers = tsk->cpu_timers;
struct signal_struct *const sig = tsk->signal;
- struct task_cputime *tsk_expires = &tsk->cputime_expires;
+ struct task_cputime_t *tsk_expires = &tsk->cputime_expires;
unsigned long long expires;
unsigned long soft;
@@ -938,7 +938,7 @@ static void check_process_timers(struct task_struct *tsk,
unsigned long long utime, ptime, virt_expires, prof_expires;
unsigned long long sum_sched_runtime, sched_expires;
struct list_head *timers = sig->cpu_timers;
- struct task_cputime cputime;
+ struct task_cputime_t cputime;
unsigned long soft;
/*
@@ -1041,7 +1041,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
} else {
/*
* Protect arm_timer() and timer sampling in case of call to
- * thread_group_cputime().
+ * thread_group_cputime_t().
*/
sighand = lock_task_sighand(p, &flags);
if (unlikely(sighand == NULL)) {
@@ -1084,8 +1084,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
* Returns true if any field of the former is greater than the corresponding
* field of the latter if the latter field is set. Otherwise returns false.
*/
-static inline int task_cputime_expired(const struct task_cputime *sample,
- const struct task_cputime *expires)
+static inline int task_cputime_expired(const struct task_cputime_t *sample,
+ const struct task_cputime_t *expires)
{
if (expires->utime && sample->utime >= expires->utime)
return 1;
@@ -1112,9 +1112,9 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
struct signal_struct *sig;
if (!task_cputime_zero(&tsk->cputime_expires)) {
- struct task_cputime task_sample;
+ struct task_cputime_t task_sample;
- task_cputime(tsk, &task_sample.utime, &task_sample.stime);
+ task_cputime_t(tsk, &task_sample.utime, &task_sample.stime);
task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;
if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
return 1;
@@ -1137,7 +1137,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
*/
if (READ_ONCE(sig->cputimer.running) &&
!READ_ONCE(sig->cputimer.checking_timer)) {
- struct task_cputime group_sample;
+ struct task_cputime_t group_sample;
sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index f8e26ab..040d0a6 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -66,11 +66,11 @@ void bacct_add_tsk(struct user_namespace *user_ns,
task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
rcu_read_unlock();
- task_cputime(tsk, &utime, &stime);
+ task_cputime_t(tsk, &utime, &stime);
stats->ac_utime = cputime_to_usecs(utime);
stats->ac_stime = cputime_to_usecs(stime);
- task_cputime_scaled(tsk, &utimescaled, &stimescaled);
+ task_cputime_t_scaled(tsk, &utimescaled, &stimescaled);
stats->ac_utimescaled = cputime_to_usecs(utimescaled);
stats->ac_stimescaled = cputime_to_usecs(stimescaled);
@@ -159,7 +159,7 @@ void acct_update_integrals(struct task_struct *tsk)
unsigned long flags;
local_irq_save(flags);
- task_cputime(tsk, &utime, &stime);
+ task_cputime_t(tsk, &utime, &stime);
__acct_update_integrals(tsk, utime, stime);
local_irq_restore(flags);
}
--
2.7.4
Not sure if MISDN stats are ABI but it displays task cputime in cputime_t
raw value regardless of what type cputime_t wraps which could be either
jiffies, nsecs, usecs, or whatever random time unit. Plus it wrongly
assumes that cputime_t is long.
Given that this dump is broken anyway, lets just display the nanosec
value and stick with that.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
drivers/isdn/mISDN/stack.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index 0a36617..b324474 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -203,7 +203,7 @@ mISDNStackd(void *data)
{
struct mISDNstack *st = data;
#ifdef MISDN_MSG_STATS
- cputime_t utime, stime;
+ u64 utime, stime;
#endif
int err = 0;
@@ -306,9 +306,9 @@ mISDNStackd(void *data)
"msg %d sleep %d stopped\n",
dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt,
st->stopped_cnt);
- task_cputime_t(st->thread, &utime, &stime);
+ task_cputime(st->thread, &utime, &stime);
printk(KERN_DEBUG
- "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n",
+ "mISDNStackd daemon for %s utime(%llu) stime(%llu)\n",
dev_name(&st->dev->dev), utime, stime);
printk(KERN_DEBUG
"mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n",
--
2.7.4
The irqtime is accounted is nsecs and stored in
cpu_irq_time.hardirq_time and cpu_irq_time.softirq_time. Once the
accumulated amount reaches a new jiffy, this one gets accounted to the
kcpustat.
This was necessary when kcpustat was stored in cputime_t, which could at
worst have a jiffies granularity. But now kcpustat is stored in nsecs
so this whole discretization game with temporary irqtime storage has
become unnecessary.
We can now directly account the irqtime to the kcpustat.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/sched/cputime.c | 50 +++++++++++++++++---------------------------------
kernel/sched/sched.h | 7 ++++---
2 files changed, 21 insertions(+), 36 deletions(-)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 26cd477..2e28ae2 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -44,6 +44,7 @@ void disable_sched_clock_irqtime(void)
void irqtime_account_irq(struct task_struct *curr)
{
struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
+ u64 *cpustat = kcpustat_this_cpu->cpustat;
s64 delta;
int cpu;
@@ -61,49 +62,35 @@ void irqtime_account_irq(struct task_struct *curr)
* in that case, so as not to confuse scheduler with a special task
* that do not consume any time, but still wants to run.
*/
- if (hardirq_count())
- irqtime->hardirq_time += delta;
- else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
- irqtime->softirq_time += delta;
+ if (hardirq_count()) {
+ cpustat[CPUTIME_IRQ] += delta;
+ irqtime->tick_delta += delta;
+ } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) {
+ cpustat[CPUTIME_SOFTIRQ] += delta;
+ irqtime->tick_delta += delta;
+ }
u64_stats_update_end(&irqtime->sync);
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);
-static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime)
+static cputime_t irqtime_tick_accounted(cputime_t maxtime)
{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
- cputime_t irq_cputime;
+ struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
+ cputime_t delta;
- irq_cputime = nsecs_to_cputime64(irqtime - cpustat[idx]);
- irq_cputime = min(irq_cputime, maxtime);
- cpustat[idx] += cputime_to_nsecs(irq_cputime);
+ delta = nsecs_to_cputime(irqtime->tick_delta);
+ delta = min(delta, maxtime);
+ irqtime->tick_delta -= cputime_to_nsecs(delta);
- return irq_cputime;
-}
-
-static cputime_t irqtime_account_hi_update(cputime_t maxtime)
-{
- return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time),
- CPUTIME_IRQ, maxtime);
-}
-
-static cputime_t irqtime_account_si_update(cputime_t maxtime)
-{
- return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time),
- CPUTIME_SOFTIRQ, maxtime);
+ return delta;
}
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
#define sched_clock_irqtime (0)
-static cputime_t irqtime_account_hi_update(cputime_t dummy)
-{
- return 0;
-}
-
-static cputime_t irqtime_account_si_update(cputime_t dummy)
+static cputime_t irqtime_tick_accounted(cputime_t dummy)
{
return 0;
}
@@ -280,10 +267,7 @@ static inline cputime_t account_other_time(cputime_t max)
accounted = steal_account_process_time(max);
if (accounted < max)
- accounted += irqtime_account_hi_update(max - accounted);
-
- if (accounted < max)
- accounted += irqtime_account_si_update(max - accounted);
+ accounted += irqtime_tick_accounted(max - accounted);
return accounted;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 055f935..cdc18cf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -4,6 +4,7 @@
#include <linux/sched/rt.h>
#include <linux/u64_stats_sync.h>
#include <linux/sched/deadline.h>
+#include <linux/kernel_stat.h>
#include <linux/binfmts.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
@@ -1741,8 +1742,7 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { }
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
- u64 hardirq_time;
- u64 softirq_time;
+ u64 tick_delta;
u64 irq_start_time;
struct u64_stats_sync sync;
};
@@ -1752,12 +1752,13 @@ DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
static inline u64 irq_time_read(int cpu)
{
struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
+ u64 *cpustat = kcpustat_cpu(cpu).cpustat;
unsigned int seq;
u64 total;
do {
seq = __u64_stats_fetch_begin(&irqtime->sync);
- total = irqtime->softirq_time + irqtime->hardirq_time;
+ total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ];
} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
return total;
--
2.7.4
Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.
Also convert posix-cpu-timers to use nsec based internal counters to
simplify it.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/posix-timers.h | 12 +--
include/linux/sched.h | 6 +-
kernel/fork.c | 2 +-
kernel/sched/cputime.c | 6 +-
kernel/sched/stats.h | 4 +-
kernel/time/itimer.c | 6 +-
kernel/time/posix-cpu-timers.c | 210 +++++++++++++++++------------------------
7 files changed, 100 insertions(+), 146 deletions(-)
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 62d44c1..890de52 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -8,19 +8,9 @@
#include <linux/alarmtimer.h>
-static inline unsigned long long cputime_to_expires(cputime_t expires)
-{
- return (__force unsigned long long)expires;
-}
-
-static inline cputime_t expires_to_cputime(unsigned long long expires)
-{
- return (__force cputime_t)expires;
-}
-
struct cpu_timer_list {
struct list_head entry;
- unsigned long long expires, incr;
+ u64 expires, incr;
struct task_struct *task;
int firing;
};
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 394e85d..148a0a6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -753,7 +753,7 @@ struct signal_struct {
struct thread_group_cputimer cputimer;
/* Earliest-expiration cache. */
- struct task_cputime_t cputime_expires;
+ struct task_cputime cputime_expires;
#ifdef CONFIG_NO_HZ_FULL
atomic_t tick_dep_mask;
@@ -1675,7 +1675,7 @@ struct task_struct {
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
- struct task_cputime_t cputime_expires;
+ struct task_cputime cputime_expires;
struct list_head cpu_timers[3];
/* process credentials */
@@ -3498,7 +3498,7 @@ static __always_inline bool need_resched(void)
* Thread group CPU time accounting.
*/
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times);
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
static inline void thread_group_cputime_t(struct task_struct *tsk,
struct task_cputime_t *cputime)
diff --git a/kernel/fork.c b/kernel/fork.c
index 600e93b..2d36cd8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1308,7 +1308,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
if (cpu_limit != RLIM_INFINITY) {
- sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
+ sig->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC;
sig->cputimer.running = true;
}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 2e28ae2..31c457f 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
/* Add user time to process. */
p->utime += cputime_to_nsecs(cputime);
- account_group_user_time(p, cputime);
+ account_group_user_time(p, cputime_to_nsecs(cputime));
index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
@@ -144,7 +144,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
/* Add guest time to process. */
p->utime += cputime_to_nsecs(cputime);
- account_group_user_time(p, cputime);
+ account_group_user_time(p, cputime_to_nsecs(cputime));
p->gtime += cputime_to_nsecs(cputime);
/* Add guest time to cpustat. */
@@ -168,7 +168,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime, int index)
{
/* Add system time to process. */
p->stime += cputime_to_nsecs(cputime);
- account_group_system_time(p, cputime);
+ account_group_system_time(p, cputime_to_nsecs(cputime));
/* Add system time to cpustat. */
task_group_account_field(p, index, cputime_to_nsecs(cputime));
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 34659a8..9788478 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -216,7 +216,7 @@ static inline bool cputimer_running(struct task_struct *tsk)
* running CPU and update the utime field there.
*/
static inline void account_group_user_time(struct task_struct *tsk,
- cputime_t cputime)
+ u64 cputime)
{
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
@@ -237,7 +237,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
* running CPU and update the stime field there.
*/
static inline void account_group_system_time(struct task_struct *tsk,
- cputime_t cputime)
+ u64 cputime)
{
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index bc4ba4c..20aa205 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -53,15 +53,15 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
cval = it->expires;
cinterval = it->incr;
if (cval) {
- struct task_cputime_t cputime;
+ struct task_cputime cputime;
cputime_t t;
thread_group_cputimer(tsk, &cputime);
if (clock_id == CPUCLOCK_PROF)
- t = cputime.utime + cputime.stime;
+ t = nsecs_to_cputime(cputime.utime + cputime.stime);
else
/* CPUCLOCK_VIRT */
- t = cputime.utime;
+ t = nsecs_to_cputime(cputime.utime);
if (cval < t)
/* about to fire */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 9e7a5be..a02f012 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -51,39 +51,14 @@ static int check_clock(const clockid_t which_clock)
return error;
}
-static inline unsigned long long
-timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
-{
- unsigned long long ret;
-
- ret = 0; /* high half always zero when .cpu used */
- if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
- ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
- } else {
- ret = cputime_to_expires(timespec_to_cputime(tp));
- }
- return ret;
-}
-
-static void sample_to_timespec(const clockid_t which_clock,
- unsigned long long expires,
- struct timespec *tp)
-{
- if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
- *tp = ns_to_timespec(expires);
- else
- cputime_to_timespec((__force cputime_t)expires, tp);
-}
-
/*
* Update expiry time from increment, and increase overrun count,
* given the current clock sample.
*/
-static void bump_cpu_timer(struct k_itimer *timer,
- unsigned long long now)
+static void bump_cpu_timer(struct k_itimer *timer, u64 now)
{
int i;
- unsigned long long delta, incr;
+ u64 delta, incr;
if (timer->it.cpu.incr == 0)
return;
@@ -116,28 +91,28 @@ static void bump_cpu_timer(struct k_itimer *timer,
* Checks @cputime to see if all fields are zero. Returns true if all fields
* are zero, false if any field is nonzero.
*/
-static inline int task_cputime_zero(const struct task_cputime_t *cputime)
+static inline int task_cputime_zero(const struct task_cputime *cputime)
{
if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
return 1;
return 0;
}
-static inline unsigned long long prof_ticks(struct task_struct *p)
+static inline u64 prof_ticks(struct task_struct *p)
{
- cputime_t utime, stime;
+ u64 utime, stime;
- task_cputime_t(p, &utime, &stime);
+ task_cputime(p, &utime, &stime);
- return cputime_to_expires(utime + stime);
+ return utime + stime;
}
-static inline unsigned long long virt_ticks(struct task_struct *p)
+static inline u64 virt_ticks(struct task_struct *p)
{
- cputime_t utime, stime;
+ u64 utime, stime;
- task_cputime_t(p, &utime, &stime);
+ task_cputime(p, &utime, &stime);
- return cputime_to_expires(utime);
+ return utime;
}
static int
@@ -177,8 +152,8 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
/*
* Sample a per-thread clock for the given task.
*/
-static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
- unsigned long long *sample)
+static int cpu_clock_sample(const clockid_t which_clock,
+ struct task_struct *p, u64 *sample)
{
switch (CPUCLOCK_WHICH(which_clock)) {
default:
@@ -211,7 +186,7 @@ static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
}
}
-static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime_t *sum)
+static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum)
{
__update_gt_cputime(&cputime_atomic->utime, sum->utime);
__update_gt_cputime(&cputime_atomic->stime, sum->stime);
@@ -219,7 +194,7 @@ static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct
}
/* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */
-static inline void sample_cputime_atomic(struct task_cputime_t *times,
+static inline void sample_cputime_atomic(struct task_cputime *times,
struct task_cputime_atomic *atomic_times)
{
times->utime = atomic64_read(&atomic_times->utime);
@@ -227,10 +202,10 @@ static inline void sample_cputime_atomic(struct task_cputime_t *times,
times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime);
}
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times)
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
{
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
- struct task_cputime_t sum;
+ struct task_cputime sum;
/* Check if cputimer isn't running. This is accessed without locking. */
if (!READ_ONCE(cputimer->running)) {
@@ -239,7 +214,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times
* values through the TIMER_ABSTIME flag, therefore we have
* to synchronize the timer to the clock every time we start it.
*/
- thread_group_cputime_t(tsk, &sum);
+ thread_group_cputime(tsk, &sum);
update_gt_cputime(&cputimer->cputime_atomic, &sum);
/*
@@ -261,23 +236,23 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times
*/
static int cpu_clock_sample_group(const clockid_t which_clock,
struct task_struct *p,
- unsigned long long *sample)
+ u64 *sample)
{
- struct task_cputime_t cputime;
+ struct task_cputime cputime;
switch (CPUCLOCK_WHICH(which_clock)) {
default:
return -EINVAL;
case CPUCLOCK_PROF:
- thread_group_cputime_t(p, &cputime);
- *sample = cputime_to_expires(cputime.utime + cputime.stime);
+ thread_group_cputime(p, &cputime);
+ *sample = cputime.utime + cputime.stime;
break;
case CPUCLOCK_VIRT:
- thread_group_cputime_t(p, &cputime);
- *sample = cputime_to_expires(cputime.utime);
+ thread_group_cputime(p, &cputime);
+ *sample = cputime.utime;
break;
case CPUCLOCK_SCHED:
- thread_group_cputime_t(p, &cputime);
+ thread_group_cputime(p, &cputime);
*sample = cputime.sum_exec_runtime;
break;
}
@@ -289,7 +264,7 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk,
struct timespec *tp)
{
int err = -EINVAL;
- unsigned long long rtn;
+ u64 rtn;
if (CPUCLOCK_PERTHREAD(which_clock)) {
if (same_thread_group(tsk, current))
@@ -300,7 +275,7 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk,
}
if (!err)
- sample_to_timespec(which_clock, rtn, tp);
+ *tp = ns_to_timespec(rtn);
return err;
}
@@ -457,7 +432,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
cleanup_timers(tsk->signal->cpu_timers);
}
-static inline int expires_gt(cputime_t expires, cputime_t new_exp)
+static inline int expires_gt(u64 expires, u64 new_exp)
{
return expires == 0 || expires > new_exp;
}
@@ -470,7 +445,7 @@ static void arm_timer(struct k_itimer *timer)
{
struct task_struct *p = timer->it.cpu.task;
struct list_head *head, *listpos;
- struct task_cputime_t *cputime_expires;
+ struct task_cputime *cputime_expires;
struct cpu_timer_list *const nt = &timer->it.cpu;
struct cpu_timer_list *next;
@@ -492,7 +467,7 @@ static void arm_timer(struct k_itimer *timer)
list_add(&nt->entry, listpos);
if (listpos == head) {
- unsigned long long exp = nt->expires;
+ u64 exp = nt->expires;
/*
* We are the new earliest-expiring POSIX 1.b timer, hence
@@ -503,16 +478,15 @@ static void arm_timer(struct k_itimer *timer)
switch (CPUCLOCK_WHICH(timer->it_clock)) {
case CPUCLOCK_PROF:
- if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
- cputime_expires->prof_exp = expires_to_cputime(exp);
+ if (expires_gt(cputime_expires->prof_exp, exp))
+ cputime_expires->prof_exp = exp;
break;
case CPUCLOCK_VIRT:
- if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
- cputime_expires->virt_exp = expires_to_cputime(exp);
+ if (expires_gt(cputime_expires->virt_exp, exp))
+ cputime_expires->virt_exp = exp;
break;
case CPUCLOCK_SCHED:
- if (cputime_expires->sched_exp == 0 ||
- cputime_expires->sched_exp > exp)
+ if (expires_gt(cputime_expires->sched_exp, exp))
cputime_expires->sched_exp = exp;
break;
}
@@ -563,20 +537,19 @@ static void cpu_timer_fire(struct k_itimer *timer)
* traversal.
*/
static int cpu_timer_sample_group(const clockid_t which_clock,
- struct task_struct *p,
- unsigned long long *sample)
+ struct task_struct *p, u64 *sample)
{
- struct task_cputime_t cputime;
+ struct task_cputime cputime;
thread_group_cputimer(p, &cputime);
switch (CPUCLOCK_WHICH(which_clock)) {
default:
return -EINVAL;
case CPUCLOCK_PROF:
- *sample = cputime_to_expires(cputime.utime + cputime.stime);
+ *sample = cputime.utime + cputime.stime;
break;
case CPUCLOCK_VIRT:
- *sample = cputime_to_expires(cputime.utime);
+ *sample = cputime.utime;
break;
case CPUCLOCK_SCHED:
*sample = cputime.sum_exec_runtime;
@@ -597,12 +570,12 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
unsigned long flags;
struct sighand_struct *sighand;
struct task_struct *p = timer->it.cpu.task;
- unsigned long long old_expires, new_expires, old_incr, val;
+ u64 old_expires, new_expires, old_incr, val;
int ret;
WARN_ON_ONCE(p == NULL);
- new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
+ new_expires = timespec_to_ns(&new->it_value);
/*
* Protect against sighand release/switch in exit/exec and p->cpu_timers
@@ -663,9 +636,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
bump_cpu_timer(timer, val);
if (val < timer->it.cpu.expires) {
old_expires = timer->it.cpu.expires - val;
- sample_to_timespec(timer->it_clock,
- old_expires,
- &old->it_value);
+ old->it_value = ns_to_timespec(old_expires);
} else {
old->it_value.tv_nsec = 1;
old->it_value.tv_sec = 0;
@@ -703,8 +674,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
* Install the new reload setting, and
* set up the signal and overrun bookkeeping.
*/
- timer->it.cpu.incr = timespec_to_sample(timer->it_clock,
- &new->it_interval);
+ timer->it.cpu.incr = timespec_to_ns(&new->it_interval);
/*
* This acts as a modification timestamp for the timer,
@@ -727,17 +697,15 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
ret = 0;
out:
- if (old) {
- sample_to_timespec(timer->it_clock,
- old_incr, &old->it_interval);
- }
+ if (old)
+ old->it_interval = ns_to_timespec(old_incr);
return ret;
}
static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
{
- unsigned long long now;
+ u64 now;
struct task_struct *p = timer->it.cpu.task;
WARN_ON_ONCE(p == NULL);
@@ -745,8 +713,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
/*
* Easy part: convert the reload time.
*/
- sample_to_timespec(timer->it_clock,
- timer->it.cpu.incr, &itp->it_interval);
+ itp->it_interval = ns_to_timespec(timer->it.cpu.incr);
if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */
itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
@@ -765,7 +732,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
/*
* Protect against sighand release/switch in exit/exec and
* also make timer sampling safe if it ends up calling
- * thread_group_cputime_t().
+ * thread_group_cputime().
*/
sighand = lock_task_sighand(p, &flags);
if (unlikely(sighand == NULL)) {
@@ -775,8 +742,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
* Call the timer disarmed, nothing else to do.
*/
timer->it.cpu.expires = 0;
- sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
- &itp->it_value);
+ itp->it_value = ns_to_timespec(timer->it.cpu.expires);
return;
} else {
cpu_timer_sample_group(timer->it_clock, p, &now);
@@ -785,9 +751,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
}
if (now < timer->it.cpu.expires) {
- sample_to_timespec(timer->it_clock,
- timer->it.cpu.expires - now,
- &itp->it_value);
+ itp->it_value = ns_to_timespec(timer->it.cpu.expires - now);
} else {
/*
* The timer should have expired already, but the firing
@@ -830,8 +794,8 @@ static void check_thread_timers(struct task_struct *tsk,
{
struct list_head *timers = tsk->cpu_timers;
struct signal_struct *const sig = tsk->signal;
- struct task_cputime_t *tsk_expires = &tsk->cputime_expires;
- unsigned long long expires;
+ struct task_cputime *tsk_expires = &tsk->cputime_expires;
+ u64 expires;
unsigned long soft;
/*
@@ -842,10 +806,10 @@ static void check_thread_timers(struct task_struct *tsk,
return;
expires = check_timers_list(timers, firing, prof_ticks(tsk));
- tsk_expires->prof_exp = expires_to_cputime(expires);
+ tsk_expires->prof_exp = expires;
expires = check_timers_list(++timers, firing, virt_ticks(tsk));
- tsk_expires->virt_exp = expires_to_cputime(expires);
+ tsk_expires->virt_exp = expires;
tsk_expires->sched_exp = check_timers_list(++timers, firing,
tsk->se.sum_exec_runtime);
@@ -895,13 +859,12 @@ static inline void stop_process_timers(struct signal_struct *sig)
}
static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
- unsigned long long *expires,
- unsigned long long cur_time, int signo)
+ u64 *expires, u64 cur_time, int signo)
{
if (!it->expires)
return;
- if (cur_time >= it->expires) {
+ if (cur_time >= cputime_to_nsecs(it->expires)) {
if (it->incr) {
it->expires += it->incr;
it->error += it->incr_error;
@@ -919,8 +882,8 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
}
- if (it->expires && (!*expires || it->expires < *expires)) {
- *expires = it->expires;
+ if (it->expires && (!*expires || cputime_to_nsecs(it->expires) < *expires)) {
+ *expires = cputime_to_nsecs(it->expires);
}
}
@@ -933,10 +896,10 @@ static void check_process_timers(struct task_struct *tsk,
struct list_head *firing)
{
struct signal_struct *const sig = tsk->signal;
- unsigned long long utime, ptime, virt_expires, prof_expires;
- unsigned long long sum_sched_runtime, sched_expires;
+ u64 utime, ptime, virt_expires, prof_expires;
+ u64 sum_sched_runtime, sched_expires;
struct list_head *timers = sig->cpu_timers;
- struct task_cputime_t cputime;
+ struct task_cputime cputime;
unsigned long soft;
/*
@@ -956,8 +919,8 @@ static void check_process_timers(struct task_struct *tsk,
* Collect the current process totals.
*/
thread_group_cputimer(tsk, &cputime);
- utime = cputime_to_expires(cputime.utime);
- ptime = utime + cputime_to_expires(cputime.stime);
+ utime = cputime.utime;
+ ptime = utime + cputime.stime;
sum_sched_runtime = cputime.sum_exec_runtime;
prof_expires = check_timers_list(timers, firing, ptime);
@@ -973,10 +936,10 @@ static void check_process_timers(struct task_struct *tsk,
SIGVTALRM);
soft = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
if (soft != RLIM_INFINITY) {
- unsigned long psecs = cputime_to_secs(ptime);
+ unsigned long psecs = div_u64(ptime, NSEC_PER_SEC);
unsigned long hard =
READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
- cputime_t x;
+ u64 x;
if (psecs >= hard) {
/*
* At the hard limit, we just die.
@@ -995,14 +958,13 @@ static void check_process_timers(struct task_struct *tsk,
sig->rlim[RLIMIT_CPU].rlim_cur = soft;
}
}
- x = secs_to_cputime(soft);
- if (!prof_expires || x < prof_expires) {
+ x = soft * NSEC_PER_SEC;
+ if (!prof_expires || x < prof_expires)
prof_expires = x;
- }
}
- sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
- sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
+ sig->cputime_expires.prof_exp = prof_expires;
+ sig->cputime_expires.virt_exp = virt_expires;
sig->cputime_expires.sched_exp = sched_expires;
if (task_cputime_zero(&sig->cputime_expires))
stop_process_timers(sig);
@@ -1019,7 +981,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
struct sighand_struct *sighand;
unsigned long flags;
struct task_struct *p = timer->it.cpu.task;
- unsigned long long now;
+ u64 now;
WARN_ON_ONCE(p == NULL);
@@ -1039,7 +1001,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
} else {
/*
* Protect arm_timer() and timer sampling in case of call to
- * thread_group_cputime_t().
+ * thread_group_cputime().
*/
sighand = lock_task_sighand(p, &flags);
if (unlikely(sighand == NULL)) {
@@ -1082,8 +1044,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
* Returns true if any field of the former is greater than the corresponding
* field of the latter if the latter field is set. Otherwise returns false.
*/
-static inline int task_cputime_expired(const struct task_cputime_t *sample,
- const struct task_cputime_t *expires)
+static inline int task_cputime_expired(const struct task_cputime *sample,
+ const struct task_cputime *expires)
{
if (expires->utime && sample->utime >= expires->utime)
return 1;
@@ -1110,9 +1072,9 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
struct signal_struct *sig;
if (!task_cputime_zero(&tsk->cputime_expires)) {
- struct task_cputime_t task_sample;
+ struct task_cputime task_sample;
- task_cputime_t(tsk, &task_sample.utime, &task_sample.stime);
+ task_cputime(tsk, &task_sample.utime, &task_sample.stime);
task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;
if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
return 1;
@@ -1135,7 +1097,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
*/
if (READ_ONCE(sig->cputimer.running) &&
!READ_ONCE(sig->cputimer.checking_timer)) {
- struct task_cputime_t group_sample;
+ struct task_cputime group_sample;
sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
@@ -1218,7 +1180,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval)
{
- unsigned long long now;
+ u64 now, new;
WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
cpu_timer_sample_group(clock_idx, tsk, &now);
@@ -1230,31 +1192,33 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
* it to be absolute.
*/
if (*oldval) {
- if (*oldval <= now) {
+ if (cputime_to_nsecs(*oldval) <= now) {
/* Just about to fire. */
*oldval = cputime_one_jiffy;
} else {
- *oldval -= now;
+ *oldval -= nsecs_to_cputime(now);
}
}
if (!*newval)
return;
- *newval += now;
+ *newval += nsecs_to_cputime(now);
}
+ new = cputime_to_nsecs(*newval);
+
/*
* Update expiration cache if we are the earliest timer, or eventually
* RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
*/
switch (clock_idx) {
case CPUCLOCK_PROF:
- if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
- tsk->signal->cputime_expires.prof_exp = *newval;
+ if (expires_gt(tsk->signal->cputime_expires.prof_exp, new))
+ tsk->signal->cputime_expires.prof_exp = new;
break;
case CPUCLOCK_VIRT:
- if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
- tsk->signal->cputime_expires.virt_exp = *newval;
+ if (expires_gt(tsk->signal->cputime_expires.virt_exp, new))
+ tsk->signal->cputime_expires.virt_exp = new;
break;
}
@@ -1312,7 +1276,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
/*
* We were interrupted by a signal.
*/
- sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
+ *rqtp = ns_to_timespec(timer.it.cpu.expires);
error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
if (!error) {
/*
--
2.7.4
This is one more step toward converting cputime accounting to pure nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/ia64/kernel/time.c | 5 +++--
arch/powerpc/kernel/time.c | 2 +-
arch/s390/kernel/vtime.c | 2 +-
include/linux/kernel_stat.h | 2 +-
kernel/sched/cputime.c | 42 ++++++++++++++++++++++++------------------
5 files changed, 30 insertions(+), 23 deletions(-)
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 021f44a..ce6bf09 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -63,11 +63,12 @@ extern cputime_t cycle_to_cputime(u64 cyc);
void vtime_account_user(struct task_struct *tsk)
{
- cputime_t delta_utime;
+ u64 delta_utime;
struct thread_info *ti = task_thread_info(tsk);
if (ti->ac_utime) {
- delta_utime = cycle_to_cputime(ti->ac_utime);
+ /* TODO: cycle_to_ns */
+ delta_utime = cputime_to_nsecs(cycle_to_cputime(ti->ac_utime));
account_user_time(tsk, delta_utime);
ti->ac_utime = 0;
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 19361fb..9b372b1 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -392,7 +392,7 @@ void vtime_account_user(struct task_struct *tsk)
acct->user_time = 0;
acct->user_time_scaled = 0;
acct->utime_sspurr = 0;
- account_user_time(tsk, utime);
+ account_user_time(tsk, cputime_to_nsecs(utime));
tsk->utimescaled += cputime_to_nsecs(utimescaled);
}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 4905e8c..0ac0d4c 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -137,7 +137,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
user_scaled = (user_scaled * mult) / div;
system_scaled = (system_scaled * mult) / div;
}
- account_user_time(tsk, user);
+ account_user_time(tsk, cputime_to_nsecs(user));
tsk->utimescaled += cputime_to_nsecs(user_scaled);
account_system_time(tsk, hardirq_offset, system);
tsk->stimescaled += cputime_to_nsecs(system_scaled);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 00f7768..6831734 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -78,7 +78,7 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
return kstat_cpu(cpu).irqs_sum;
}
-extern void account_user_time(struct task_struct *, cputime_t);
+extern void account_user_time(struct task_struct *, u64);
extern void account_system_time(struct task_struct *, int, cputime_t);
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 31c457f..f5318d9 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -116,18 +116,18 @@ static inline void task_group_account_field(struct task_struct *p, int index,
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in user space since the last update
*/
-void account_user_time(struct task_struct *p, cputime_t cputime)
+void account_user_time(struct task_struct *p, u64 cputime)
{
int index;
/* Add user time to process. */
- p->utime += cputime_to_nsecs(cputime);
- account_group_user_time(p, cputime_to_nsecs(cputime));
+ p->utime += cputime;
+ account_group_user_time(p, cputime);
index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
/* Add user time to cpustat. */
- task_group_account_field(p, index, cputime_to_nsecs(cputime));
+ task_group_account_field(p, index, cputime);
/* Account for user time used */
acct_account_cputime(p);
@@ -363,8 +363,9 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq, int ticks)
{
- u64 cputime = (__force u64) cputime_one_jiffy * ticks;
+ u64 old_cputime = (__force u64) cputime_one_jiffy * ticks;
cputime_t other;
+ u64 cputime;
/*
* When returning from idle, many ticks can get accounted at
@@ -374,9 +375,11 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
* other time can exceed ticks occasionally.
*/
other = account_other_time(ULONG_MAX);
- if (other >= cputime)
+ if (other >= old_cputime)
return;
- cputime -= other;
+
+ old_cputime -= other;
+ cputime = cputime_to_nsecs(old_cputime);
if (this_cpu_ksoftirqd() == p) {
/*
@@ -384,15 +387,16 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
* So, we have to handle it separately here.
* Also, p->stime needs to be updated for ksoftirqd.
*/
- __account_system_time(p, cputime, CPUTIME_SOFTIRQ);
+ __account_system_time(p, old_cputime, CPUTIME_SOFTIRQ);
} else if (user_tick) {
account_user_time(p, cputime);
} else if (p == rq->idle) {
- account_idle_time(cputime);
+ account_idle_time(old_cputime);
} else if (p->flags & PF_VCPU) { /* System time or guest time */
- account_guest_time(p, cputime);
+
+ account_guest_time(p, old_cputime);
} else {
- __account_system_time(p, cputime, CPUTIME_SYSTEM);
+ __account_system_time(p, old_cputime, CPUTIME_SYSTEM);
}
}
@@ -475,7 +479,8 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
- cputime_t cputime, steal;
+ cputime_t old_cputime, steal;
+ u64 cputime;
struct rq *rq = this_rq();
if (vtime_accounting_cpu_enabled())
@@ -486,20 +491,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
return;
}
- cputime = cputime_one_jiffy;
+ old_cputime = cputime_one_jiffy;
steal = steal_account_process_time(ULONG_MAX);
- if (steal >= cputime)
+ if (steal >= old_cputime)
return;
- cputime -= steal;
+ old_cputime -= steal;
+ cputime = cputime_to_nsecs(old_cputime);
if (user_tick)
account_user_time(p, cputime);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
- account_system_time(p, HARDIRQ_OFFSET, cputime);
+ account_system_time(p, HARDIRQ_OFFSET, old_cputime);
else
- account_idle_time(cputime);
+ account_idle_time(old_cputime);
}
/*
@@ -738,7 +744,7 @@ void vtime_account_user(struct task_struct *tsk)
tsk->vtime_snap_whence = VTIME_SYS;
if (vtime_delta(tsk)) {
delta_cpu = get_vtime_delta(tsk);
- account_user_time(tsk, delta_cpu);
+ account_user_time(tsk, cputime_to_nsecs(delta_cpu));
}
write_seqcount_end(&tsk->vtime_seqcount);
}
--
2.7.4
This is one more step toward converting cputime accounting to pure nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/ia64/kernel/time.c | 2 +-
arch/powerpc/kernel/time.c | 3 ++-
arch/s390/kernel/vtime.c | 4 ++--
include/linux/kernel_stat.h | 3 +--
kernel/sched/cputime.c | 38 ++++++++++++++++++--------------------
5 files changed, 24 insertions(+), 26 deletions(-)
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 0287ffe..0aba18e 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -113,7 +113,7 @@ void vtime_account_system(struct task_struct *tsk)
{
cputime_t delta = vtime_delta(tsk);
- account_system_time(tsk, 0, delta);
+ account_system_time(tsk, 0, cputime_to_nsecs(delta));
}
EXPORT_SYMBOL_GPL(vtime_account_system);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 7cd3b3c..b29822d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -358,8 +358,9 @@ void vtime_account_system(struct task_struct *tsk)
unsigned long delta, sys_scaled, stolen;
delta = vtime_delta(tsk, &sys_scaled, &stolen);
- account_system_time(tsk, 0, delta);
+ account_system_time(tsk, 0, cputime_to_nsecs(delta));
tsk->stimescaled += cputime_to_nsecs(sys_scaled);
+
if (stolen)
account_steal_time(cputime_to_nsecs(stolen));
}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 02c3970..b7e545f 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -139,7 +139,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
}
account_user_time(tsk, cputime_to_nsecs(user));
tsk->utimescaled += cputime_to_nsecs(user_scaled);
- account_system_time(tsk, hardirq_offset, system);
+ account_system_time(tsk, hardirq_offset, cputime_to_nsecs(system));
tsk->stimescaled += cputime_to_nsecs(system_scaled);
steal = S390_lowcore.steal_timer;
@@ -204,7 +204,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
system_scaled = (system_scaled * mult) / div;
}
- account_system_time(tsk, 0, system);
+ account_system_time(tsk, 0, cputime_to_nsecs(system));
tsk->stimescaled += cputime_to_nsecs(system_scaled);
virt_timer_forward(system);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 07b103e..ea6f359 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -9,7 +9,6 @@
#include <linux/sched.h>
#include <linux/vtime.h>
#include <asm/irq.h>
-#include <linux/cputime.h>
/*
* 'kernel_stat.h' contains the definitions needed for doing
@@ -79,7 +78,7 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
}
extern void account_user_time(struct task_struct *, u64);
-extern void account_system_time(struct task_struct *, int, cputime_t);
+extern void account_system_time(struct task_struct *, int, u64);
extern void account_steal_time(u64);
extern void account_idle_time(u64);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ac9aa7e..8d541e7 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -138,22 +138,22 @@ void account_user_time(struct task_struct *p, u64 cputime)
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in virtual machine since the last update
*/
-static void account_guest_time(struct task_struct *p, cputime_t cputime)
+static void account_guest_time(struct task_struct *p, u64 cputime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
/* Add guest time to process. */
- p->utime += cputime_to_nsecs(cputime);
- account_group_user_time(p, cputime_to_nsecs(cputime));
- p->gtime += cputime_to_nsecs(cputime);
+ p->utime += cputime;
+ account_group_user_time(p, cputime);
+ p->gtime += cputime;
/* Add guest time to cpustat. */
if (task_nice(p) > 0) {
- cpustat[CPUTIME_NICE] += cputime_to_nsecs(cputime);
- cpustat[CPUTIME_GUEST_NICE] += cputime_to_nsecs(cputime);
+ cpustat[CPUTIME_NICE] += cputime;
+ cpustat[CPUTIME_GUEST_NICE] += cputime;
} else {
- cpustat[CPUTIME_USER] += cputime_to_nsecs(cputime);
- cpustat[CPUTIME_GUEST] += cputime_to_nsecs(cputime);
+ cpustat[CPUTIME_USER] += cputime;
+ cpustat[CPUTIME_GUEST] += cputime;
}
}
@@ -164,14 +164,14 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
* @index: pointer to cpustat field that has to be updated
*/
static inline
-void __account_system_time(struct task_struct *p, cputime_t cputime, int index)
+void __account_system_time(struct task_struct *p, u64 cputime, int index)
{
/* Add system time to process. */
- p->stime += cputime_to_nsecs(cputime);
- account_group_system_time(p, cputime_to_nsecs(cputime));
+ p->stime += cputime;
+ account_group_system_time(p, cputime);
/* Add system time to cpustat. */
- task_group_account_field(p, index, cputime_to_nsecs(cputime));
+ task_group_account_field(p, index, cputime);
/* Account for system time used */
acct_account_cputime(p);
@@ -183,8 +183,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime, int index)
* @hardirq_offset: the offset to subtract from hardirq_count()
* @cputime: the cpu time spent in kernel space since the last update
*/
-void account_system_time(struct task_struct *p, int hardirq_offset,
- cputime_t cputime)
+void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
{
int index;
@@ -388,16 +387,15 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
* So, we have to handle it separately here.
* Also, p->stime needs to be updated for ksoftirqd.
*/
- __account_system_time(p, old_cputime, CPUTIME_SOFTIRQ);
+ __account_system_time(p, cputime, CPUTIME_SOFTIRQ);
} else if (user_tick) {
account_user_time(p, cputime);
} else if (p == rq->idle) {
account_idle_time(cputime);
} else if (p->flags & PF_VCPU) { /* System time or guest time */
-
- account_guest_time(p, old_cputime);
+ account_guest_time(p, cputime);
} else {
- __account_system_time(p, old_cputime, CPUTIME_SYSTEM);
+ __account_system_time(p, cputime, CPUTIME_SYSTEM);
}
}
@@ -504,7 +502,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
if (user_tick)
account_user_time(p, cputime);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
- account_system_time(p, HARDIRQ_OFFSET, old_cputime);
+ account_system_time(p, HARDIRQ_OFFSET, cputime);
else
account_idle_time(cputime);
}
@@ -724,7 +722,7 @@ static void __vtime_account_system(struct task_struct *tsk)
{
cputime_t delta_cpu = get_vtime_delta(tsk);
- account_system_time(tsk, irq_count(), delta_cpu);
+ account_system_time(tsk, irq_count(), cputime_to_nsecs(delta_cpu));
}
void vtime_account_system(struct task_struct *tsk)
--
2.7.4
Initially, nsec based cputime_t implementation belonged to ia64. It got
exported later for CONFIG_VIRT_CPU_ACCOUNTING_GEN but now it is again
only used by ia64. So let's move it back there.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/ia64/include/asm/cputime.h | 108 +++++++++++++++++++++++++++++++-
include/asm-generic/cputime.h | 4 --
include/asm-generic/cputime_nsecs.h | 121 ------------------------------------
3 files changed, 106 insertions(+), 127 deletions(-)
delete mode 100644 include/asm-generic/cputime_nsecs.h
diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index e2d3f5b..fee773c 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -20,9 +20,113 @@
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
# include <asm-generic/cputime.h>
+
#else
-# include <asm/processor.h>
-# include <asm-generic/cputime_nsecs.h>
+
+#include <asm/processor.h>
+#include <linux/math64.h>
+
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
+
+#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
+
+#define cputime_one_jiffy jiffies_to_cputime(1)
+
+#define cputime_div(__ct, divisor) div_u64((__force u64)__ct, divisor)
+#define cputime_div_rem(__ct, divisor, remainder) \
+ div_u64_rem((__force u64)__ct, divisor, remainder);
+
+/*
+ * Convert cputime <-> jiffies (HZ)
+ */
+#define cputime_to_jiffies(__ct) \
+ cputime_div(__ct, NSEC_PER_SEC / HZ)
+#define jiffies_to_cputime(__jif) \
+ (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct) \
+ cputime_div(__ct, NSEC_PER_SEC / HZ)
+#define jiffies64_to_cputime64(__jif) \
+ (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
+
+
+/*
+ * Convert cputime <-> nanoseconds
+ */
+#define cputime_to_nsecs(__ct) \
+ (__force u64)(__ct)
+#define nsecs_to_cputime(__nsecs) \
+ (__force cputime_t)(__nsecs)
+#define nsecs_to_cputime64(__nsecs) \
+ (__force cputime64_t)(__nsecs)
+
+
+/*
+ * Convert cputime <-> microseconds
+ */
+#define cputime_to_usecs(__ct) \
+ cputime_div(__ct, NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs) \
+ (__force cputime_t)((__usecs) * NSEC_PER_USEC)
+#define usecs_to_cputime64(__usecs) \
+ (__force cputime64_t)((__usecs) * NSEC_PER_USEC)
+
+/*
+ * Convert cputime <-> seconds
+ */
+#define cputime_to_secs(__ct) \
+ cputime_div(__ct, NSEC_PER_SEC)
+#define secs_to_cputime(__secs) \
+ (__force cputime_t)((__secs) * NSEC_PER_SEC)
+
+/*
+ * Convert cputime <-> timespec (nsec)
+ */
+static inline cputime_t timespec_to_cputime(const struct timespec *val)
+{
+ u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+ return (__force cputime_t) ret;
+}
+static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
+{
+ u32 rem;
+
+ val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem);
+ val->tv_nsec = rem;
+}
+
+/*
+ * Convert cputime <-> timeval (msec)
+ */
+static inline cputime_t timeval_to_cputime(const struct timeval *val)
+{
+ u64 ret = (u64)val->tv_sec * NSEC_PER_SEC +
+ val->tv_usec * NSEC_PER_USEC;
+ return (__force cputime_t) ret;
+}
+static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
+{
+ u32 rem;
+
+ val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem);
+ val->tv_usec = rem / NSEC_PER_USEC;
+}
+
+/*
+ * Convert cputime <-> clock (USER_HZ)
+ */
+#define cputime_to_clock_t(__ct) \
+ cputime_div(__ct, (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x) \
+ (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
+
+/*
+ * Convert cputime64 to clock.
+ */
+#define cputime64_to_clock_t(__ct) \
+ cputime_to_clock_t((__force cputime_t)__ct)
+
+
extern void arch_vtime_task_switch(struct task_struct *tsk);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 8a624b4..358e547 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -4,8 +4,4 @@
#include <linux/time.h>
#include <linux/jiffies.h>
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-# include <asm-generic/cputime_nsecs.h>
-#endif
-
#endif
diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
deleted file mode 100644
index 4e3b18e..0000000
--- a/include/asm-generic/cputime_nsecs.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Definitions for measuring cputime in nsecs resolution.
- *
- * Based on <arch/ia64/include/asm/cputime.h>
- *
- * Copyright (C) 2007 FUJITSU LIMITED
- * Copyright (C) 2007 Hidetoshi Seto <[email protected]>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#ifndef _ASM_GENERIC_CPUTIME_NSECS_H
-#define _ASM_GENERIC_CPUTIME_NSECS_H
-
-#include <linux/math64.h>
-
-typedef u64 __nocast cputime_t;
-typedef u64 __nocast cputime64_t;
-
-#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
-
-#define cputime_one_jiffy jiffies_to_cputime(1)
-
-#define cputime_div(__ct, divisor) div_u64((__force u64)__ct, divisor)
-#define cputime_div_rem(__ct, divisor, remainder) \
- div_u64_rem((__force u64)__ct, divisor, remainder);
-
-/*
- * Convert cputime <-> jiffies (HZ)
- */
-#define cputime_to_jiffies(__ct) \
- cputime_div(__ct, NSEC_PER_SEC / HZ)
-#define jiffies_to_cputime(__jif) \
- (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
-#define cputime64_to_jiffies64(__ct) \
- cputime_div(__ct, NSEC_PER_SEC / HZ)
-#define jiffies64_to_cputime64(__jif) \
- (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
-
-
-/*
- * Convert cputime <-> nanoseconds
- */
-#define cputime_to_nsecs(__ct) \
- (__force u64)(__ct)
-#define nsecs_to_cputime(__nsecs) \
- (__force cputime_t)(__nsecs)
-#define nsecs_to_cputime64(__nsecs) \
- (__force cputime64_t)(__nsecs)
-
-
-/*
- * Convert cputime <-> microseconds
- */
-#define cputime_to_usecs(__ct) \
- cputime_div(__ct, NSEC_PER_USEC)
-#define usecs_to_cputime(__usecs) \
- (__force cputime_t)((__usecs) * NSEC_PER_USEC)
-#define usecs_to_cputime64(__usecs) \
- (__force cputime64_t)((__usecs) * NSEC_PER_USEC)
-
-/*
- * Convert cputime <-> seconds
- */
-#define cputime_to_secs(__ct) \
- cputime_div(__ct, NSEC_PER_SEC)
-#define secs_to_cputime(__secs) \
- (__force cputime_t)((__secs) * NSEC_PER_SEC)
-
-/*
- * Convert cputime <-> timespec (nsec)
- */
-static inline cputime_t timespec_to_cputime(const struct timespec *val)
-{
- u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
- return (__force cputime_t) ret;
-}
-static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
-{
- u32 rem;
-
- val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem);
- val->tv_nsec = rem;
-}
-
-/*
- * Convert cputime <-> timeval (msec)
- */
-static inline cputime_t timeval_to_cputime(const struct timeval *val)
-{
- u64 ret = (u64)val->tv_sec * NSEC_PER_SEC +
- val->tv_usec * NSEC_PER_USEC;
- return (__force cputime_t) ret;
-}
-static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
-{
- u32 rem;
-
- val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem);
- val->tv_usec = rem / NSEC_PER_USEC;
-}
-
-/*
- * Convert cputime <-> clock (USER_HZ)
- */
-#define cputime_to_clock_t(__ct) \
- cputime_div(__ct, (NSEC_PER_SEC / USER_HZ))
-#define clock_t_to_cputime(__x) \
- (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
-
-/*
- * Convert cputime64 to clock.
- */
-#define cputime64_to_clock_t(__ct) \
- cputime_to_clock_t((__force cputime_t)__ct)
-
-#endif
--
2.7.4
There is no need anymore for this cputime_t midlayer. Let's use nsec
units directly.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/ia64/kernel/head.S | 4 ++--
arch/ia64/kernel/time.c | 17 ++++++++---------
2 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index c9b5e94..3204fdd 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1031,7 +1031,7 @@ GLOBAL_ENTRY(ia64_native_sched_clock)
END(ia64_native_sched_clock)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-GLOBAL_ENTRY(cycle_to_cputime)
+GLOBAL_ENTRY(cycle_to_nsec)
alloc r16=ar.pfs,1,0,0,0
addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
;;
@@ -1047,7 +1047,7 @@ GLOBAL_ENTRY(cycle_to_cputime)
;;
shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
br.ret.sptk.many rp
-END(cycle_to_cputime)
+END(cycle_to_nsec)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_IA64_BRL_EMU
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 0aba18e..03eb959 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -59,7 +59,7 @@ static struct clocksource *itc_clocksource;
#include <linux/kernel_stat.h>
-extern cputime_t cycle_to_cputime(u64 cyc);
+extern u64 cycle_to_nsec(u64 cyc);
void vtime_account_user(struct task_struct *tsk)
{
@@ -67,8 +67,7 @@ void vtime_account_user(struct task_struct *tsk)
struct thread_info *ti = task_thread_info(tsk);
if (ti->ac_utime) {
- /* TODO: cycle_to_ns */
- delta_utime = cputime_to_nsecs(cycle_to_cputime(ti->ac_utime));
+ delta_utime = cycle_to_nsec(ti->ac_utime);
account_user_time(tsk, delta_utime);
ti->ac_utime = 0;
}
@@ -92,17 +91,17 @@ void arch_vtime_task_switch(struct task_struct *prev)
* Account time for a transition between system, hard irq or soft irq state.
* Note that this function is called with interrupts enabled.
*/
-static cputime_t vtime_delta(struct task_struct *tsk)
+static u64 vtime_delta(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
- cputime_t delta_stime;
+ u64 delta_stime;
__u64 now;
WARN_ON_ONCE(!irqs_disabled());
now = ia64_get_itc();
- delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+ delta_stime = cycle_to_nsec(ti->ac_stime + (now - ti->ac_stamp));
ti->ac_stime = 0;
ti->ac_stamp = now;
@@ -111,15 +110,15 @@ static cputime_t vtime_delta(struct task_struct *tsk)
void vtime_account_system(struct task_struct *tsk)
{
- cputime_t delta = vtime_delta(tsk);
+ u64 delta = vtime_delta(tsk);
- account_system_time(tsk, 0, cputime_to_nsecs(delta));
+ account_system_time(tsk, 0, delta);
}
EXPORT_SYMBOL_GPL(vtime_account_system);
void vtime_account_idle(struct task_struct *tsk)
{
- account_idle_time(cputime_to_nsecs(vtime_delta(tsk)));
+ account_idle_time(vtime_delta(tsk));
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
--
2.7.4
cputime_t is now only used by two archs:
* powerpc (when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y)
* s390
And since the core doesn't use it anymore, we don't need any arch support
from the others. So we can remove their stub implementations.
A final cleanup would be to provide an efficient pure arch
implementation of cputime_to_nsec() for s390 and powerpc and finally
remove include/linux/cputime.h .
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/alpha/include/asm/Kbuild | 1 -
arch/arc/include/asm/Kbuild | 1 -
arch/arm/include/asm/Kbuild | 1 -
arch/arm64/include/asm/Kbuild | 1 -
arch/avr32/include/asm/Kbuild | 1 -
arch/blackfin/include/asm/Kbuild | 1 -
arch/c6x/include/asm/Kbuild | 1 -
arch/cris/include/asm/Kbuild | 1 -
arch/frv/include/asm/Kbuild | 1 -
arch/h8300/include/asm/Kbuild | 1 -
arch/hexagon/include/asm/Kbuild | 1 -
arch/ia64/include/asm/cputime.h | 4 +---
arch/m32r/include/asm/Kbuild | 1 -
arch/m68k/include/asm/Kbuild | 1 -
arch/metag/include/asm/Kbuild | 1 -
arch/microblaze/include/asm/Kbuild | 1 -
arch/mips/include/asm/Kbuild | 1 -
arch/mn10300/include/asm/Kbuild | 1 -
arch/nios2/include/asm/Kbuild | 1 -
arch/openrisc/include/asm/Kbuild | 1 -
arch/parisc/include/asm/Kbuild | 1 -
arch/powerpc/include/asm/cputime.h | 4 +---
arch/score/include/asm/Kbuild | 1 -
arch/sh/include/asm/Kbuild | 1 -
arch/sparc/include/asm/Kbuild | 1 -
arch/tile/include/asm/Kbuild | 1 -
arch/um/include/asm/Kbuild | 1 -
arch/unicore32/include/asm/Kbuild | 1 -
arch/x86/include/asm/Kbuild | 1 -
arch/xtensa/include/asm/Kbuild | 1 -
include/asm-generic/cputime.h | 7 -------
include/linux/cputime.h | 2 ++
32 files changed, 4 insertions(+), 41 deletions(-)
delete mode 100644 include/asm-generic/cputime.h
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index bf8475c..baa152b 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -1,7 +1,6 @@
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += exec.h
generic-y += export.h
generic-y += irq_work.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index c332604..63a0401 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -2,7 +2,6 @@ generic-y += auxvec.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
generic-y += emergency-restart.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 0745538..36730b7 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -2,7 +2,6 @@
generic-y += bitsperlong.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += early_ioremap.h
generic-y += emergency-restart.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 44e1d7f..2c87042 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,6 +1,5 @@
generic-y += bugs.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += delay.h
generic-y += div64.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 241b9b9..3d7ef2c 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -1,6 +1,5 @@
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += delay.h
generic-y += device.h
generic-y += div64.h
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 91d49c0..b15392d 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -2,7 +2,6 @@
generic-y += auxvec.h
generic-y += bitsperlong.h
generic-y += bugs.h
-generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 64465e7..4e9f574 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -5,7 +5,6 @@ generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 1778805..9f19e19 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -4,7 +4,6 @@ generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += clkdev.h
generic-y += cmpxchg.h
-generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
generic-y += errno.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 1fa084c..0f5b0d5 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -1,6 +1,5 @@
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += exec.h
generic-y += irq_work.h
generic-y += mcs_spinlock.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 373cb23..5efd0c8 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -5,7 +5,6 @@ generic-y += bugs.h
generic-y += cacheflush.h
generic-y += checksum.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += delay.h
generic-y += device.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index db8ddab..a43a7c9 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -6,7 +6,6 @@ generic-y += barrier.h
generic-y += bug.h
generic-y += bugs.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 44bcffc..3d665c0 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -18,9 +18,7 @@
#ifndef __IA64_CPUTIME_H
#define __IA64_CPUTIME_H
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-# include <asm-generic/cputime.h>
-#else
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
extern void arch_vtime_task_switch(struct task_struct *tsk);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 860e440..652100b 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -1,6 +1,5 @@
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += exec.h
generic-y += irq_work.h
generic-y += kvm_para.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index eb85bd9..815ad83 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -1,7 +1,6 @@
generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += device.h
generic-y += emergency-restart.h
generic-y += errno.h
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index 29acb89d..62c5afc 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -2,7 +2,6 @@ generic-y += auxvec.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += dma.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index b0ae88c..6275eb0 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,7 +1,6 @@
generic-y += barrier.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += device.h
generic-y += exec.h
generic-y += irq_work.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 9740066..7a3f50f 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -1,7 +1,6 @@
# MIPS headers
generic-(CONFIG_GENERIC_CSUM) += checksum.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += dma-contiguous.h
generic-y += emergency-restart.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 1c8dd0f..97f64c7 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -1,7 +1,6 @@
generic-y += barrier.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += exec.h
generic-y += irq_work.h
generic-y += mcs_spinlock.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index d63330e..35b0e88 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -6,7 +6,6 @@ generic-y += bitsperlong.h
generic-y += bug.h
generic-y += bugs.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 2832f03..ef8d1cc 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += checksum.h
generic-y += clkdev.h
generic-y += cmpxchg-local.h
generic-y += cmpxchg.h
-generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index f9b3a81..ed1faad 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -2,7 +2,6 @@
generic-y += auxvec.h
generic-y += barrier.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += device.h
generic-y += div64.h
generic-y += emergency-restart.h
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 6ec0ba6..99b5418 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -16,9 +16,7 @@
#ifndef __POWERPC_CPUTIME_H
#define __POWERPC_CPUTIME_H
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#include <asm-generic/cputime.h>
-#else
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <linux/types.h>
#include <linux/time.h>
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index a05218f..51970bb 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -4,7 +4,6 @@ header-y +=
generic-y += barrier.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += irq_work.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 751c337..cf2a750 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -1,7 +1,6 @@
generic-y += bitsperlong.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += delay.h
generic-y += div64.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index cfc9180..5df9e2d 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -2,7 +2,6 @@
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += exec.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index ba35c41..8cc9470 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -4,7 +4,6 @@ header-y += ../arch/
generic-y += bug.h
generic-y += bugs.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += errno.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 904f3eb..c67fc16 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,7 +1,6 @@
generic-y += barrier.h
generic-y += bug.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += delay.h
generic-y += device.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 256c45b..5d51ade 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -4,7 +4,6 @@ generic-y += auxvec.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 2cfed17..3c95e84 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -11,7 +11,6 @@ genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += dma-contiguous.h
generic-y += early_ioremap.h
generic-y += mcs_spinlock.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 28cf4c5..3073fb4 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -1,7 +1,6 @@
generic-y += bitsperlong.h
generic-y += bug.h
generic-y += clkdev.h
-generic-y += cputime.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += errno.h
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
deleted file mode 100644
index 358e547..0000000
--- a/include/asm-generic/cputime.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_GENERIC_CPUTIME_H
-#define _ASM_GENERIC_CPUTIME_H
-
-#include <linux/time.h>
-#include <linux/jiffies.h>
-
-#endif
diff --git a/include/linux/cputime.h b/include/linux/cputime.h
index a257d66..a691dc4 100644
--- a/include/linux/cputime.h
+++ b/include/linux/cputime.h
@@ -1,6 +1,7 @@
#ifndef __LINUX_CPUTIME_H
#define __LINUX_CPUTIME_H
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <asm/cputime.h>
#ifndef cputime_to_nsecs
@@ -8,4 +9,5 @@
(cputime_to_usecs(__ct) * NSEC_PER_USEC)
#endif
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __LINUX_CPUTIME_H */
--
2.7.4
This cputime_t implementation is now unused, we can remove it.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/asm-generic/cputime.h | 4 --
include/asm-generic/cputime_jiffies.h | 75 -----------------------------------
2 files changed, 79 deletions(-)
delete mode 100644 include/asm-generic/cputime_jiffies.h
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 5196943..8a624b4 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -4,10 +4,6 @@
#include <linux/time.h>
#include <linux/jiffies.h>
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-# include <asm-generic/cputime_jiffies.h>
-#endif
-
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# include <asm-generic/cputime_nsecs.h>
#endif
diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h
deleted file mode 100644
index 6bb8cd4..0000000
--- a/include/asm-generic/cputime_jiffies.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H
-#define _ASM_GENERIC_CPUTIME_JIFFIES_H
-
-typedef unsigned long __nocast cputime_t;
-
-#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new)
-
-#define cputime_one_jiffy jiffies_to_cputime(1)
-#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
-#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
-
-typedef u64 __nocast cputime64_t;
-
-#define cputime64_to_jiffies64(__ct) (__force u64)(__ct)
-#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif)
-
-
-/*
- * Convert nanoseconds <-> cputime
- */
-#define cputime_to_nsecs(__ct) \
- jiffies_to_nsecs(cputime_to_jiffies(__ct))
-#define nsecs_to_cputime64(__nsec) \
- jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec))
-#define nsecs_to_cputime(__nsec) \
- jiffies_to_cputime(nsecs_to_jiffies(__nsec))
-
-
-/*
- * Convert cputime to microseconds and back.
- */
-#define cputime_to_usecs(__ct) \
- jiffies_to_usecs(cputime_to_jiffies(__ct))
-#define usecs_to_cputime(__usec) \
- jiffies_to_cputime(usecs_to_jiffies(__usec))
-#define usecs_to_cputime64(__usec) \
- jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000))
-
-/*
- * Convert cputime to seconds and back.
- */
-#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ)
-#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ)
-
-/*
- * Convert cputime to timespec and back.
- */
-#define timespec_to_cputime(__val) \
- jiffies_to_cputime(timespec_to_jiffies(__val))
-#define cputime_to_timespec(__ct,__val) \
- jiffies_to_timespec(cputime_to_jiffies(__ct),__val)
-
-/*
- * Convert cputime to timeval and back.
- */
-#define timeval_to_cputime(__val) \
- jiffies_to_cputime(timeval_to_jiffies(__val))
-#define cputime_to_timeval(__ct,__val) \
- jiffies_to_timeval(cputime_to_jiffies(__ct),__val)
-
-/*
- * Convert cputime to clock and back.
- */
-#define cputime_to_clock_t(__ct) \
- jiffies_to_clock_t(cputime_to_jiffies(__ct))
-#define clock_t_to_cputime(__x) \
- jiffies_to_cputime(clock_t_to_jiffies(__x))
-
-/*
- * Convert cputime64 to clock.
- */
-#define cputime64_to_clock_t(__ct) \
- jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct))
-
-#endif
--
2.7.4
It's unused now.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/cputime.h | 5 -----
1 file changed, 5 deletions(-)
diff --git a/include/linux/cputime.h b/include/linux/cputime.h
index f2eb2ee..a257d66 100644
--- a/include/linux/cputime.h
+++ b/include/linux/cputime.h
@@ -8,9 +8,4 @@
(cputime_to_usecs(__ct) * NSEC_PER_USEC)
#endif
-#ifndef nsecs_to_cputime
-# define nsecs_to_cputime(__nsecs) \
- usecs_to_cputime((__nsecs) / NSEC_PER_USEC)
-#endif
-
#endif /* __LINUX_CPUTIME_H */
--
2.7.4
Since the core doesn't deal with cputime_t anymore, most of these APIs
have been left unused. Lets remove these.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/s390/include/asm/cputime.h | 107 ----------------------------------------
1 file changed, 107 deletions(-)
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 9a94481..d1c407d 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -25,33 +25,6 @@ static inline unsigned long __div(unsigned long long n, unsigned long base)
return n / base;
}
-#define cputime_one_jiffy jiffies_to_cputime(1)
-
-/*
- * Convert cputime to jiffies and back.
- */
-static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
-{
- return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / HZ);
-}
-
-static inline cputime_t jiffies_to_cputime(const unsigned int jif)
-{
- return (__force cputime_t)(jif * (CPUTIME_PER_SEC / HZ));
-}
-
-static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
-{
- unsigned long long jif = (__force unsigned long long) cputime;
- do_div(jif, CPUTIME_PER_SEC / HZ);
- return jif;
-}
-
-static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
-{
- return (__force cputime64_t)(jif * (CPUTIME_PER_SEC / HZ));
-}
-
/*
* Convert cputime to microseconds and back.
*/
@@ -60,86 +33,6 @@ static inline unsigned int cputime_to_usecs(const cputime_t cputime)
return (__force unsigned long long) cputime >> 12;
}
-static inline cputime_t usecs_to_cputime(const unsigned int m)
-{
- return (__force cputime_t)(m * CPUTIME_PER_USEC);
-}
-
-#define usecs_to_cputime64(m) usecs_to_cputime(m)
-
-/*
- * Convert cputime to milliseconds and back.
- */
-static inline unsigned int cputime_to_secs(const cputime_t cputime)
-{
- return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / 2) >> 1;
-}
-
-static inline cputime_t secs_to_cputime(const unsigned int s)
-{
- return (__force cputime_t)(s * CPUTIME_PER_SEC);
-}
-
-/*
- * Convert cputime to timespec and back.
- */
-static inline cputime_t timespec_to_cputime(const struct timespec *value)
-{
- unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC;
- return (__force cputime_t)(ret + __div(value->tv_nsec * CPUTIME_PER_USEC, NSEC_PER_USEC));
-}
-
-static inline void cputime_to_timespec(const cputime_t cputime,
- struct timespec *value)
-{
- unsigned long long __cputime = (__force unsigned long long) cputime;
- value->tv_nsec = (__cputime % CPUTIME_PER_SEC) * NSEC_PER_USEC / CPUTIME_PER_USEC;
- value->tv_sec = __cputime / CPUTIME_PER_SEC;
-}
-
-/*
- * Convert cputime to timeval and back.
- * Since cputime and timeval have the same resolution (microseconds)
- * this is easy.
- */
-static inline cputime_t timeval_to_cputime(const struct timeval *value)
-{
- unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC;
- return (__force cputime_t)(ret + value->tv_usec * CPUTIME_PER_USEC);
-}
-
-static inline void cputime_to_timeval(const cputime_t cputime,
- struct timeval *value)
-{
- unsigned long long __cputime = (__force unsigned long long) cputime;
- value->tv_usec = (__cputime % CPUTIME_PER_SEC) / CPUTIME_PER_USEC;
- value->tv_sec = __cputime / CPUTIME_PER_SEC;
-}
-
-/*
- * Convert cputime to clock and back.
- */
-static inline clock_t cputime_to_clock_t(cputime_t cputime)
-{
- unsigned long long clock = (__force unsigned long long) cputime;
- do_div(clock, CPUTIME_PER_SEC / USER_HZ);
- return clock;
-}
-
-static inline cputime_t clock_t_to_cputime(unsigned long x)
-{
- return (__force cputime_t)(x * (CPUTIME_PER_SEC / USER_HZ));
-}
-
-/*
- * Convert cputime64 to clock.
- */
-static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
-{
- unsigned long long clock = (__force unsigned long long) cputime;
- do_div(clock, CPUTIME_PER_SEC / USER_HZ);
- return clock;
-}
u64 arch_cpu_idle_time(int cpu);
--
2.7.4
Since the core doesn't deal with cputime_t anymore, most of these APIs
have been left unused. Lets remove these.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/powerpc/include/asm/cputime.h | 173 -------------------------------------
arch/powerpc/kernel/time.c | 20 +----
2 files changed, 2 insertions(+), 191 deletions(-)
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index aa2e6a3..6ec0ba6 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -18,9 +18,6 @@
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <asm-generic/cputime.h>
-#ifdef __KERNEL__
-static inline void setup_cputime_one_jiffy(void) { }
-#endif
#else
#include <linux/types.h>
@@ -36,65 +33,6 @@ typedef u64 __nocast cputime64_t;
#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new)
#ifdef __KERNEL__
-
-/*
- * One jiffy in timebase units computed during initialization
- */
-extern cputime_t cputime_one_jiffy;
-
-/*
- * Convert cputime <-> jiffies
- */
-extern u64 __cputime_jiffies_factor;
-
-static inline unsigned long cputime_to_jiffies(const cputime_t ct)
-{
- return mulhdu((__force u64) ct, __cputime_jiffies_factor);
-}
-
-static inline cputime_t jiffies_to_cputime(const unsigned long jif)
-{
- u64 ct;
- unsigned long sec;
-
- /* have to be a little careful about overflow */
- ct = jif % HZ;
- sec = jif / HZ;
- if (ct) {
- ct *= tb_ticks_per_sec;
- do_div(ct, HZ);
- }
- if (sec)
- ct += (cputime_t) sec * tb_ticks_per_sec;
- return (__force cputime_t) ct;
-}
-
-static inline void setup_cputime_one_jiffy(void)
-{
- cputime_one_jiffy = jiffies_to_cputime(1);
-}
-
-static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
-{
- u64 ct;
- u64 sec = jif;
-
- /* have to be a little careful about overflow */
- ct = do_div(sec, HZ);
- if (ct) {
- ct *= tb_ticks_per_sec;
- do_div(ct, HZ);
- }
- if (sec)
- ct += (u64) sec * tb_ticks_per_sec;
- return (__force cputime64_t) ct;
-}
-
-static inline u64 cputime64_to_jiffies64(const cputime_t ct)
-{
- return mulhdu((__force u64) ct, __cputime_jiffies_factor);
-}
-
/*
* Convert cputime <-> microseconds
*/
@@ -105,117 +43,6 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct)
return mulhdu((__force u64) ct, __cputime_usec_factor);
}
-static inline cputime_t usecs_to_cputime(const unsigned long us)
-{
- u64 ct;
- unsigned long sec;
-
- /* have to be a little careful about overflow */
- ct = us % 1000000;
- sec = us / 1000000;
- if (ct) {
- ct *= tb_ticks_per_sec;
- do_div(ct, 1000000);
- }
- if (sec)
- ct += (cputime_t) sec * tb_ticks_per_sec;
- return (__force cputime_t) ct;
-}
-
-#define usecs_to_cputime64(us) usecs_to_cputime(us)
-
-/*
- * Convert cputime <-> seconds
- */
-extern u64 __cputime_sec_factor;
-
-static inline unsigned long cputime_to_secs(const cputime_t ct)
-{
- return mulhdu((__force u64) ct, __cputime_sec_factor);
-}
-
-static inline cputime_t secs_to_cputime(const unsigned long sec)
-{
- return (__force cputime_t)((u64) sec * tb_ticks_per_sec);
-}
-
-/*
- * Convert cputime <-> timespec
- */
-static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
-{
- u64 x = (__force u64) ct;
- unsigned int frac;
-
- frac = do_div(x, tb_ticks_per_sec);
- p->tv_sec = x;
- x = (u64) frac * 1000000000;
- do_div(x, tb_ticks_per_sec);
- p->tv_nsec = x;
-}
-
-static inline cputime_t timespec_to_cputime(const struct timespec *p)
-{
- u64 ct;
-
- ct = (u64) p->tv_nsec * tb_ticks_per_sec;
- do_div(ct, 1000000000);
- return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
-}
-
-/*
- * Convert cputime <-> timeval
- */
-static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
-{
- u64 x = (__force u64) ct;
- unsigned int frac;
-
- frac = do_div(x, tb_ticks_per_sec);
- p->tv_sec = x;
- x = (u64) frac * 1000000;
- do_div(x, tb_ticks_per_sec);
- p->tv_usec = x;
-}
-
-static inline cputime_t timeval_to_cputime(const struct timeval *p)
-{
- u64 ct;
-
- ct = (u64) p->tv_usec * tb_ticks_per_sec;
- do_div(ct, 1000000);
- return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
-}
-
-/*
- * Convert cputime <-> clock_t (units of 1/USER_HZ seconds)
- */
-extern u64 __cputime_clockt_factor;
-
-static inline unsigned long cputime_to_clock_t(const cputime_t ct)
-{
- return mulhdu((__force u64) ct, __cputime_clockt_factor);
-}
-
-static inline cputime_t clock_t_to_cputime(const unsigned long clk)
-{
- u64 ct;
- unsigned long sec;
-
- /* have to be a little careful about overflow */
- ct = clk % USER_HZ;
- sec = clk / USER_HZ;
- if (ct) {
- ct *= tb_ticks_per_sec;
- do_div(ct, USER_HZ);
- }
- if (sec)
- ct += (u64) sec * tb_ticks_per_sec;
- return (__force cputime_t) ct;
-}
-
-#define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct))
-
/*
* PPC64 uses PACA which is task independent for storing accounting data while
* PPC32 uses struct thread_info, therefore at task switch the accounting data
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b29822d..ad31f3f 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -152,20 +152,11 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
- * Factors for converting from cputime_t (timebase ticks) to
- * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
- * These are all stored as 0.64 fixed-point binary fractions.
+ * Factor for converting from cputime_t (timebase ticks) to
+ * microseconds. This is stored as 0.64 fixed-point binary fraction.
*/
-u64 __cputime_jiffies_factor;
-EXPORT_SYMBOL(__cputime_jiffies_factor);
u64 __cputime_usec_factor;
EXPORT_SYMBOL(__cputime_usec_factor);
-u64 __cputime_sec_factor;
-EXPORT_SYMBOL(__cputime_sec_factor);
-u64 __cputime_clockt_factor;
-EXPORT_SYMBOL(__cputime_clockt_factor);
-
-cputime_t cputime_one_jiffy;
#ifdef CONFIG_PPC_SPLPAR
void (*dtl_consumer)(struct dtl_entry *, u64);
@@ -181,14 +172,8 @@ static void calc_cputime_factors(void)
{
struct div_result res;
- div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
- __cputime_jiffies_factor = res.result_low;
div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
__cputime_usec_factor = res.result_low;
- div128_by_32(1, 0, tb_ticks_per_sec, &res);
- __cputime_sec_factor = res.result_low;
- div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
- __cputime_clockt_factor = res.result_low;
}
/*
@@ -1019,7 +1004,6 @@ void __init time_init(void)
tb_ticks_per_sec = ppc_tb_freq;
tb_ticks_per_usec = ppc_tb_freq / 1000000;
calc_cputime_factors();
- setup_cputime_one_jiffy();
/*
* Compute scale factor for sched_clock.
--
2.7.4
This way we don't need to deal with cputime_t details from the core code.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/s390/include/asm/cputime.h | 2 +-
arch/s390/kernel/idle.c | 5 +++--
fs/proc/stat.c | 4 ++--
3 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 221b454..9a94481 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -141,7 +141,7 @@ static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
return clock;
}
-cputime64_t arch_cpu_idle_time(int cpu);
+u64 arch_cpu_idle_time(int cpu);
#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 99f1d81..8c081ca 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -84,7 +84,7 @@ static ssize_t show_idle_time(struct device *dev,
}
DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
-cputime64_t arch_cpu_idle_time(int cpu)
+u64 arch_cpu_idle_time(int cpu)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
unsigned long long now, idle_enter, idle_exit;
@@ -96,7 +96,8 @@ cputime64_t arch_cpu_idle_time(int cpu)
idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq));
- return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
+
+ return cputime_to_nsec(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0);
}
void arch_cpu_idle_enter(void)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 44475a4..e47c3e8 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -27,7 +27,7 @@ static u64 get_idle_time(int cpu)
idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
- idle += cputime_to_nsecs(arch_idle_time(cpu));
+ idle += arch_idle_time(cpu);
return idle;
}
@@ -37,7 +37,7 @@ static u64 get_iowait_time(int cpu)
iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
if (cpu_online(cpu) && nr_iowait_cpu(cpu))
- iowait += cputime_to_nsecs(arch_idle_time(cpu));
+ iowait += arch_idle_time(cpu);
return iowait;
}
--
2.7.4
This nsec based cputime_t implementation isn't used anymore. We can
remove it.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/ia64/include/asm/cputime.h | 106 ----------------------------------------
1 file changed, 106 deletions(-)
diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index fee773c..44bcffc 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -20,113 +20,7 @@
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
# include <asm-generic/cputime.h>
-
#else
-
-#include <asm/processor.h>
-#include <linux/math64.h>
-
-typedef u64 __nocast cputime_t;
-typedef u64 __nocast cputime64_t;
-
-#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
-
-#define cputime_one_jiffy jiffies_to_cputime(1)
-
-#define cputime_div(__ct, divisor) div_u64((__force u64)__ct, divisor)
-#define cputime_div_rem(__ct, divisor, remainder) \
- div_u64_rem((__force u64)__ct, divisor, remainder);
-
-/*
- * Convert cputime <-> jiffies (HZ)
- */
-#define cputime_to_jiffies(__ct) \
- cputime_div(__ct, NSEC_PER_SEC / HZ)
-#define jiffies_to_cputime(__jif) \
- (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
-#define cputime64_to_jiffies64(__ct) \
- cputime_div(__ct, NSEC_PER_SEC / HZ)
-#define jiffies64_to_cputime64(__jif) \
- (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
-
-
-/*
- * Convert cputime <-> nanoseconds
- */
-#define cputime_to_nsecs(__ct) \
- (__force u64)(__ct)
-#define nsecs_to_cputime(__nsecs) \
- (__force cputime_t)(__nsecs)
-#define nsecs_to_cputime64(__nsecs) \
- (__force cputime64_t)(__nsecs)
-
-
-/*
- * Convert cputime <-> microseconds
- */
-#define cputime_to_usecs(__ct) \
- cputime_div(__ct, NSEC_PER_USEC)
-#define usecs_to_cputime(__usecs) \
- (__force cputime_t)((__usecs) * NSEC_PER_USEC)
-#define usecs_to_cputime64(__usecs) \
- (__force cputime64_t)((__usecs) * NSEC_PER_USEC)
-
-/*
- * Convert cputime <-> seconds
- */
-#define cputime_to_secs(__ct) \
- cputime_div(__ct, NSEC_PER_SEC)
-#define secs_to_cputime(__secs) \
- (__force cputime_t)((__secs) * NSEC_PER_SEC)
-
-/*
- * Convert cputime <-> timespec (nsec)
- */
-static inline cputime_t timespec_to_cputime(const struct timespec *val)
-{
- u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
- return (__force cputime_t) ret;
-}
-static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
-{
- u32 rem;
-
- val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem);
- val->tv_nsec = rem;
-}
-
-/*
- * Convert cputime <-> timeval (msec)
- */
-static inline cputime_t timeval_to_cputime(const struct timeval *val)
-{
- u64 ret = (u64)val->tv_sec * NSEC_PER_SEC +
- val->tv_usec * NSEC_PER_USEC;
- return (__force cputime_t) ret;
-}
-static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
-{
- u32 rem;
-
- val->tv_sec = cputime_div_rem(ct, NSEC_PER_SEC, &rem);
- val->tv_usec = rem / NSEC_PER_USEC;
-}
-
-/*
- * Convert cputime <-> clock (USER_HZ)
- */
-#define cputime_to_clock_t(__ct) \
- cputime_div(__ct, (NSEC_PER_SEC / USER_HZ))
-#define clock_t_to_cputime(__x) \
- (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
-
-/*
- * Convert cputime64 to clock.
- */
-#define cputime64_to_clock_t(__ct) \
- cputime_to_clock_t((__force cputime_t)__ct)
-
-
extern void arch_vtime_task_switch(struct task_struct *tsk);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
--
2.7.4
This is the final step toward tick based cputime conversion. Now that
whole cputime accounting engine accounts in nsecs, we can convert the
very source of the cputime to account into nsec.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/sched/cputime.c | 52 +++++++++++++++++++++-----------------------------
1 file changed, 22 insertions(+), 30 deletions(-)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8d541e7..8d9fac7 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -74,14 +74,13 @@ void irqtime_account_irq(struct task_struct *curr)
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);
-static cputime_t irqtime_tick_accounted(cputime_t maxtime)
+static u64 irqtime_tick_accounted(u64 maxtime)
{
struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
- cputime_t delta;
+ u64 delta;
- delta = nsecs_to_cputime(irqtime->tick_delta);
- delta = min(delta, maxtime);
- irqtime->tick_delta -= cputime_to_nsecs(delta);
+ delta = min(irqtime->tick_delta, maxtime);
+ irqtime->tick_delta -= delta;
return delta;
}
@@ -90,7 +89,7 @@ static cputime_t irqtime_tick_accounted(cputime_t maxtime)
#define sched_clock_irqtime (0)
-static cputime_t irqtime_tick_accounted(cputime_t dummy)
+static u64 irqtime_tick_accounted(u64 dummy)
{
return 0;
}
@@ -233,22 +232,19 @@ void account_idle_time(u64 cputime)
* ticks are not redelivered later. Due to that, this function may on
* occasion account more time than the calling functions think elapsed.
*/
-static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
+static __always_inline u64 steal_account_process_time(u64 maxtime)
{
#ifdef CONFIG_PARAVIRT
if (static_key_false(¶virt_steal_enabled)) {
- cputime_t steal_cputime;
- u64 steal, rounded;
+ u64 steal;
steal = paravirt_steal_clock(smp_processor_id());
steal -= this_rq()->prev_steal_time;
+ steal = min(steal, maxtime);
+ account_steal_time(steal);
+ this_rq()->prev_steal_time += steal;
- steal_cputime = min(nsecs_to_cputime(steal), maxtime);
- rounded = cputime_to_nsecs(steal_cputime);
- account_steal_time(rounded);
- this_rq()->prev_steal_time += rounded;
-
- return steal_cputime;
+ return steal;
}
#endif
return 0;
@@ -257,9 +253,9 @@ static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
/*
* Account how much elapsed time was spent in steal, irq, or softirq time.
*/
-static inline cputime_t account_other_time(cputime_t max)
+static inline u64 account_other_time(u64 max)
{
- cputime_t accounted;
+ u64 accounted;
/* Shall be converted to a lockdep-enabled lightweight check */
WARN_ON_ONCE(!irqs_disabled());
@@ -363,9 +359,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq, int ticks)
{
- u64 old_cputime = (__force u64) cputime_one_jiffy * ticks;
- cputime_t other;
- u64 cputime;
+ u64 other, cputime = TICK_NSEC * ticks;
/*
* When returning from idle, many ticks can get accounted at
@@ -375,11 +369,10 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
* other time can exceed ticks occasionally.
*/
other = account_other_time(ULONG_MAX);
- if (other >= old_cputime)
+ if (other >= cputime)
return;
- old_cputime -= other;
- cputime = cputime_to_nsecs(old_cputime);
+ cputime -= other;
if (this_cpu_ksoftirqd() == p) {
/*
@@ -478,8 +471,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
- cputime_t old_cputime, steal;
- u64 cputime;
+ u64 cputime, steal;
struct rq *rq = this_rq();
if (vtime_accounting_cpu_enabled())
@@ -490,14 +482,13 @@ void account_process_tick(struct task_struct *p, int user_tick)
return;
}
- old_cputime = cputime_one_jiffy;
+ cputime = TICK_NSEC;
steal = steal_account_process_time(ULONG_MAX);
- if (steal >= old_cputime)
+ if (steal >= cputime)
return;
- old_cputime -= steal;
- cputime = cputime_to_nsecs(old_cputime);
+ cputime -= steal;
if (user_tick)
account_user_time(p, cputime);
@@ -521,7 +512,7 @@ void account_idle_ticks(unsigned long ticks)
}
cputime = ticks * TICK_NSEC;
- steal = cputime_to_nsecs(steal_account_process_time(ULONG_MAX));
+ steal = steal_account_process_time(ULONG_MAX);
if (steal >= cputime)
return;
@@ -742,6 +733,7 @@ void vtime_account_user(struct task_struct *tsk)
write_seqcount_begin(&tsk->vtime_seqcount);
tsk->vtime_snap_whence = VTIME_SYS;
if (vtime_delta(tsk)) {
+ u64 nsecs;
delta_cpu = get_vtime_delta(tsk);
account_user_time(tsk, cputime_to_nsecs(delta_cpu));
}
--
2.7.4
Turn the full dynticks cputime clock source to return nsec while keeping
its very internals jiffies based for performance reasons.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/sched/cputime.c | 31 ++++++++++++-------------------
1 file changed, 12 insertions(+), 19 deletions(-)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8d9fac7..737a22b 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -679,20 +679,20 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static cputime_t vtime_delta(struct task_struct *tsk)
+static u64 vtime_delta(struct task_struct *tsk)
{
unsigned long now = READ_ONCE(jiffies);
if (time_before(now, (unsigned long)tsk->vtime_snap))
return 0;
- return jiffies_to_cputime(now - tsk->vtime_snap);
+ return jiffies_to_nsecs(now - tsk->vtime_snap);
}
-static cputime_t get_vtime_delta(struct task_struct *tsk)
+static u64 get_vtime_delta(struct task_struct *tsk)
{
unsigned long now = READ_ONCE(jiffies);
- cputime_t delta, other;
+ u64 delta, other;
/*
* Unlike tick based timing, vtime based timing never has lost
@@ -701,7 +701,7 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
* elapsed time. Limit account_other_time to prevent rounding
* errors from causing elapsed vtime to go negative.
*/
- delta = jiffies_to_cputime(now - tsk->vtime_snap);
+ delta = jiffies_to_nsecs(now - tsk->vtime_snap);
other = account_other_time(delta);
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
tsk->vtime_snap = now;
@@ -711,9 +711,7 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
static void __vtime_account_system(struct task_struct *tsk)
{
- cputime_t delta_cpu = get_vtime_delta(tsk);
-
- account_system_time(tsk, irq_count(), cputime_to_nsecs(delta_cpu));
+ account_system_time(tsk, irq_count(), get_vtime_delta(tsk));
}
void vtime_account_system(struct task_struct *tsk)
@@ -728,15 +726,12 @@ void vtime_account_system(struct task_struct *tsk)
void vtime_account_user(struct task_struct *tsk)
{
- cputime_t delta_cpu;
+ u64 delta_cpu;
write_seqcount_begin(&tsk->vtime_seqcount);
tsk->vtime_snap_whence = VTIME_SYS;
- if (vtime_delta(tsk)) {
- u64 nsecs;
- delta_cpu = get_vtime_delta(tsk);
- account_user_time(tsk, cputime_to_nsecs(delta_cpu));
- }
+ if (vtime_delta(tsk))
+ account_user_time(tsk, get_vtime_delta(tsk));
write_seqcount_end(&tsk->vtime_seqcount);
}
@@ -777,9 +772,7 @@ EXPORT_SYMBOL_GPL(vtime_guest_exit);
void vtime_account_idle(struct task_struct *tsk)
{
- cputime_t delta_cpu = get_vtime_delta(tsk);
-
- account_idle_time(cputime_to_nsecs(delta_cpu));
+ account_idle_time(get_vtime_delta(tsk));
}
void arch_vtime_task_switch(struct task_struct *prev)
@@ -819,7 +812,7 @@ u64 task_gtime(struct task_struct *t)
gtime = t->gtime;
if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
- gtime += cputime_to_nsecs(vtime_delta(t));
+ gtime += vtime_delta(t);
} while (read_seqcount_retry(&t->vtime_seqcount, seq));
@@ -852,7 +845,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t))
continue;
- delta = cputime_to_nsecs(vtime_delta(t));
+ delta = vtime_delta(t);
/*
* Task runs either in user or kernel space, add pending nohz time to
--
2.7.4
This is one more step toward converting cputime accounting to pure nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/ia64/kernel/time.c | 2 +-
arch/powerpc/kernel/time.c | 2 +-
arch/s390/kernel/idle.c | 2 +-
include/linux/kernel_stat.h | 2 +-
kernel/sched/cputime.c | 18 +++++++++---------
5 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index ce6bf09..0287ffe 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -119,7 +119,7 @@ EXPORT_SYMBOL_GPL(vtime_account_system);
void vtime_account_idle(struct task_struct *tsk)
{
- account_idle_time(vtime_delta(tsk));
+ account_idle_time(cputime_to_nsecs(vtime_delta(tsk)));
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 3efa5a6..7cd3b3c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -370,7 +370,7 @@ void vtime_account_idle(struct task_struct *tsk)
unsigned long delta, sys_scaled, stolen;
delta = vtime_delta(tsk, &sys_scaled, &stolen);
- account_idle_time(delta + stolen);
+ account_idle_time(cputime_to_nsecs(delta + stolen));
}
/*
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 7a55c29..99f1d81 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -43,7 +43,7 @@ void enabled_wait(void)
idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
idle->idle_time += idle_time;
idle->idle_count++;
- account_idle_time(idle_time);
+ account_idle_time(cputime_to_nsecs(idle_time));
write_seqcount_end(&idle->seqcount);
}
NOKPROBE_SYMBOL(enabled_wait);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 8355f8e..07b103e 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -81,7 +81,7 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
extern void account_user_time(struct task_struct *, u64);
extern void account_system_time(struct task_struct *, int, cputime_t);
extern void account_steal_time(u64);
-extern void account_idle_time(cputime_t);
+extern void account_idle_time(u64);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index e6990a4..ac9aa7e 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -218,15 +218,15 @@ void account_steal_time(u64 cputime)
* Account for idle time.
* @cputime: the cpu time spent in idle wait
*/
-void account_idle_time(cputime_t cputime)
+void account_idle_time(u64 cputime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
struct rq *rq = this_rq();
if (atomic_read(&rq->nr_iowait) > 0)
- cpustat[CPUTIME_IOWAIT] += cputime_to_nsecs(cputime);
+ cpustat[CPUTIME_IOWAIT] += cputime;
else
- cpustat[CPUTIME_IDLE] += cputime_to_nsecs(cputime);
+ cpustat[CPUTIME_IDLE] += cputime;
}
/*
@@ -392,7 +392,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
} else if (user_tick) {
account_user_time(p, cputime);
} else if (p == rq->idle) {
- account_idle_time(old_cputime);
+ account_idle_time(cputime);
} else if (p->flags & PF_VCPU) { /* System time or guest time */
account_guest_time(p, old_cputime);
@@ -506,7 +506,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
account_system_time(p, HARDIRQ_OFFSET, old_cputime);
else
- account_idle_time(old_cputime);
+ account_idle_time(cputime);
}
/*
@@ -515,15 +515,15 @@ void account_process_tick(struct task_struct *p, int user_tick)
*/
void account_idle_ticks(unsigned long ticks)
{
- cputime_t cputime, steal;
+ u64 cputime, steal;
if (sched_clock_irqtime) {
irqtime_account_idle_ticks(ticks);
return;
}
- cputime = jiffies_to_cputime(ticks);
- steal = steal_account_process_time(ULONG_MAX);
+ cputime = ticks * TICK_NSEC;
+ steal = cputime_to_nsecs(steal_account_process_time(ULONG_MAX));
if (steal >= cputime)
return;
@@ -789,7 +789,7 @@ void vtime_account_idle(struct task_struct *tsk)
{
cputime_t delta_cpu = get_vtime_delta(tsk);
- account_idle_time(delta_cpu);
+ account_idle_time(cputime_to_nsecs(delta_cpu));
}
void arch_vtime_task_switch(struct task_struct *prev)
--
2.7.4
This is one more step toward converting cputime accounting to pure nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/powerpc/kernel/time.c | 2 +-
arch/s390/kernel/vtime.c | 2 +-
include/linux/kernel_stat.h | 2 +-
kernel/sched/cputime.c | 11 ++++++-----
4 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 9b372b1..3efa5a6 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -361,7 +361,7 @@ void vtime_account_system(struct task_struct *tsk)
account_system_time(tsk, 0, delta);
tsk->stimescaled += cputime_to_nsecs(sys_scaled);
if (stolen)
- account_steal_time(stolen);
+ account_steal_time(cputime_to_nsecs(stolen));
}
EXPORT_SYMBOL_GPL(vtime_account_system);
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 0ac0d4c..02c3970 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -145,7 +145,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
steal = S390_lowcore.steal_timer;
if ((s64) steal > 0) {
S390_lowcore.steal_timer = 0;
- account_steal_time(steal);
+ account_steal_time(cputime_to_nsecs(steal));
}
return virt_timer_forward(user + system);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 6831734..8355f8e 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -80,7 +80,7 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
extern void account_user_time(struct task_struct *, u64);
extern void account_system_time(struct task_struct *, int, cputime_t);
-extern void account_steal_time(cputime_t);
+extern void account_steal_time(u64);
extern void account_idle_time(cputime_t);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index f5318d9..e6990a4 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -207,11 +207,11 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
* Account for involuntary wait time.
* @cputime: the cpu time spent in involuntary wait
*/
-void account_steal_time(cputime_t cputime)
+void account_steal_time(u64 cputime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
- cpustat[CPUTIME_STEAL] += cputime_to_nsecs(cputime);
+ cpustat[CPUTIME_STEAL] += cputime;
}
/*
@@ -239,14 +239,15 @@ static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
#ifdef CONFIG_PARAVIRT
if (static_key_false(¶virt_steal_enabled)) {
cputime_t steal_cputime;
- u64 steal;
+ u64 steal, rounded;
steal = paravirt_steal_clock(smp_processor_id());
steal -= this_rq()->prev_steal_time;
steal_cputime = min(nsecs_to_cputime(steal), maxtime);
- account_steal_time(steal_cputime);
- this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime);
+ rounded = cputime_to_nsecs(steal_cputime);
+ account_steal_time(rounded);
+ this_rq()->prev_steal_time += rounded;
return steal_cputime;
}
--
2.7.4
Now that the whole cputime conversion to nsec units is complete, we
can remove the compatibility accessors.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/sched.h | 40 ----------------------------------------
1 file changed, 40 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a5081ff..5541090 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -29,7 +29,6 @@ struct sched_param {
#include <asm/page.h>
#include <asm/ptrace.h>
-#include <linux/cputime.h>
#include <linux/smp.h>
#include <linux/sem.h>
@@ -611,13 +610,6 @@ struct task_cputime {
unsigned long long sum_exec_runtime;
};
-/* Temporary type to ease cputime_t to nsecs conversion */
-struct task_cputime_t {
- cputime_t utime;
- cputime_t stime;
- unsigned long long sum_exec_runtime;
-};
-
/* Alternate field names when used to cache expirations. */
#define virt_exp utime
#define prof_exp stime
@@ -2273,27 +2265,6 @@ static inline void task_cputime_scaled(struct task_struct *t,
}
#endif
-static inline void task_cputime_t(struct task_struct *t,
- cputime_t *utime, cputime_t *stime)
-{
- u64 ut, st;
-
- task_cputime(t, &ut, &st);
- *utime = nsecs_to_cputime(ut);
- *stime = nsecs_to_cputime(st);
-}
-
-static inline void task_cputime_t_scaled(struct task_struct *t,
- cputime_t *utimescaled,
- cputime_t *stimescaled)
-{
- u64 ut, st;
-
- task_cputime_scaled(t, &ut, &st);
- *utimescaled = nsecs_to_cputime(ut);
- *stimescaled = nsecs_to_cputime(st);
-}
-
extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
@@ -3498,17 +3469,6 @@ static __always_inline bool need_resched(void)
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
-static inline void thread_group_cputime_t(struct task_struct *tsk,
- struct task_cputime_t *cputime)
-{
- struct task_cputime times;
-
- thread_group_cputime(tsk, ×);
- cputime->utime = nsecs_to_cputime(times.utime);
- cputime->stime = nsecs_to_cputime(times.stime);
- cputime->sum_exec_runtime = times.sum_exec_runtime;
-}
-
/*
* Reevaluate whether the task has signals pending delivery.
* Wake the task if so.
--
2.7.4
Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.
Also convert itimers to use nsec based internal counters. This simplifies
it and remove the whole game with error/inc_error which served to deal
with cputime_t random granularity.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/posix-timers.h | 2 +-
include/linux/sched.h | 6 ++--
include/trace/events/timer.h | 26 ++++++++---------
kernel/time/itimer.c | 64 +++++++++++++++---------------------------
kernel/time/posix-cpu-timers.c | 43 +++++++++++-----------------
5 files changed, 55 insertions(+), 86 deletions(-)
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 890de52..64aa189 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -119,7 +119,7 @@ void run_posix_cpu_timers(struct task_struct *task);
void posix_cpu_timers_exit(struct task_struct *task);
void posix_cpu_timers_exit_group(struct task_struct *task);
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
- cputime_t *newval, cputime_t *oldval);
+ u64 *newval, u64 *oldval);
long clock_nanosleep_restart(struct restart_block *restart_block);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 148a0a6..a5081ff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -566,10 +566,8 @@ struct pacct_struct {
};
struct cpu_itimer {
- cputime_t expires;
- cputime_t incr;
- u32 error;
- u32 incr_error;
+ u64 expires;
+ u64 incr;
};
/**
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 28c5da6..c636b51 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -271,17 +271,17 @@ DEFINE_EVENT(hrtimer_class, hrtimer_cancel,
TRACE_EVENT(itimer_state,
TP_PROTO(int which, const struct itimerval *const value,
- cputime_t expires),
+ unsigned long long expires),
TP_ARGS(which, value, expires),
TP_STRUCT__entry(
- __field( int, which )
- __field( cputime_t, expires )
- __field( long, value_sec )
- __field( long, value_usec )
- __field( long, interval_sec )
- __field( long, interval_usec )
+ __field( int, which )
+ __field( unsigned long long, expires )
+ __field( long, value_sec )
+ __field( long, value_usec )
+ __field( long, interval_sec )
+ __field( long, interval_usec )
),
TP_fast_assign(
@@ -294,7 +294,7 @@ TRACE_EVENT(itimer_state,
),
TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld",
- __entry->which, (unsigned long long)__entry->expires,
+ __entry->which, __entry->expires,
__entry->value_sec, __entry->value_usec,
__entry->interval_sec, __entry->interval_usec)
);
@@ -307,14 +307,14 @@ TRACE_EVENT(itimer_state,
*/
TRACE_EVENT(itimer_expire,
- TP_PROTO(int which, struct pid *pid, cputime_t now),
+ TP_PROTO(int which, struct pid *pid, unsigned long long now),
TP_ARGS(which, pid, now),
TP_STRUCT__entry(
- __field( int , which )
- __field( pid_t, pid )
- __field( cputime_t, now )
+ __field( int , which )
+ __field( pid_t, pid )
+ __field( unsigned long long, now )
),
TP_fast_assign(
@@ -324,7 +324,7 @@ TRACE_EVENT(itimer_expire,
),
TP_printk("which=%d pid=%d now=%llu", __entry->which,
- (int) __entry->pid, (unsigned long long)__entry->now)
+ (int) __entry->pid, __entry->now)
);
#ifdef CONFIG_NO_HZ_COMMON
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 20aa205..41ae174 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -45,35 +45,35 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer)
static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
struct itimerval *const value)
{
- cputime_t cval, cinterval;
+ u64 val, interval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
spin_lock_irq(&tsk->sighand->siglock);
- cval = it->expires;
- cinterval = it->incr;
- if (cval) {
+ val = it->expires;
+ interval = it->incr;
+ if (val) {
struct task_cputime cputime;
- cputime_t t;
+ u64 t;
thread_group_cputimer(tsk, &cputime);
if (clock_id == CPUCLOCK_PROF)
- t = nsecs_to_cputime(cputime.utime + cputime.stime);
+ t = cputime.utime + cputime.stime;
else
/* CPUCLOCK_VIRT */
- t = nsecs_to_cputime(cputime.utime);
+ t = cputime.utime;
- if (cval < t)
+ if (val < t)
/* about to fire */
- cval = cputime_one_jiffy;
+ val = TICK_NSEC;
else
- cval = cval - t;
+ val -= t;
}
spin_unlock_irq(&tsk->sighand->siglock);
- cputime_to_timeval(cval, &value->it_value);
- cputime_to_timeval(cinterval, &value->it_interval);
+ value->it_value = ns_to_timeval(val);
+ value->it_interval = ns_to_timeval(interval);
}
int do_getitimer(int which, struct itimerval *value)
@@ -129,55 +129,35 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns)
-{
- struct timespec ts;
- s64 cpu_ns;
-
- cputime_to_timespec(ct, &ts);
- cpu_ns = timespec_to_ns(&ts);
-
- return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns;
-}
-
static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
const struct itimerval *const value,
struct itimerval *const ovalue)
{
- cputime_t cval, nval, cinterval, ninterval;
- s64 ns_ninterval, ns_nval;
- u32 error, incr_error;
+ u64 oval, nval, ointerval, ninterval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
- nval = timeval_to_cputime(&value->it_value);
- ns_nval = timeval_to_ns(&value->it_value);
- ninterval = timeval_to_cputime(&value->it_interval);
- ns_ninterval = timeval_to_ns(&value->it_interval);
-
- error = cputime_sub_ns(nval, ns_nval);
- incr_error = cputime_sub_ns(ninterval, ns_ninterval);
+ nval = timeval_to_ns(&value->it_value);
+ ninterval = timeval_to_ns(&value->it_interval);
spin_lock_irq(&tsk->sighand->siglock);
- cval = it->expires;
- cinterval = it->incr;
- if (cval || nval) {
+ oval = it->expires;
+ ointerval = it->incr;
+ if (oval || nval) {
if (nval > 0)
- nval += cputime_one_jiffy;
- set_process_cpu_timer(tsk, clock_id, &nval, &cval);
+ nval += TICK_NSEC;
+ set_process_cpu_timer(tsk, clock_id, &nval, &oval);
}
it->expires = nval;
it->incr = ninterval;
- it->error = error;
- it->incr_error = incr_error;
trace_itimer_state(clock_id == CPUCLOCK_VIRT ?
ITIMER_VIRTUAL : ITIMER_PROF, value, nval);
spin_unlock_irq(&tsk->sighand->siglock);
if (ovalue) {
- cputime_to_timeval(cval, &ovalue->it_value);
- cputime_to_timeval(cinterval, &ovalue->it_interval);
+ ovalue->it_value = ns_to_timeval(oval);
+ ovalue->it_interval = ns_to_timeval(ointerval);
}
}
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a02f012..2afff0d 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -21,10 +21,10 @@
*/
void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
{
- cputime_t cputime = secs_to_cputime(rlim_new);
+ u64 nsecs = rlim_new * NSEC_PER_SEC;
spin_lock_irq(&task->sighand->siglock);
- set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
+ set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL);
spin_unlock_irq(&task->sighand->siglock);
}
@@ -864,17 +864,11 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
if (!it->expires)
return;
- if (cur_time >= cputime_to_nsecs(it->expires)) {
- if (it->incr) {
+ if (cur_time >= it->expires) {
+ if (it->incr)
it->expires += it->incr;
- it->error += it->incr_error;
- if (it->error >= TICK_NSEC) {
- it->expires -= cputime_one_jiffy;
- it->error -= TICK_NSEC;
- }
- } else {
+ else
it->expires = 0;
- }
trace_itimer_expire(signo == SIGPROF ?
ITIMER_PROF : ITIMER_VIRTUAL,
@@ -882,9 +876,8 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
}
- if (it->expires && (!*expires || cputime_to_nsecs(it->expires) < *expires)) {
- *expires = cputime_to_nsecs(it->expires);
- }
+ if (it->expires && (!*expires || it->expires < *expires))
+ *expires = it->expires;
}
/*
@@ -1178,9 +1171,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
* The tsk->sighand->siglock must be held by the caller.
*/
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
- cputime_t *newval, cputime_t *oldval)
+ u64 *newval, u64 *oldval)
{
- u64 now, new;
+ u64 now;
WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
cpu_timer_sample_group(clock_idx, tsk, &now);
@@ -1192,33 +1185,31 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
* it to be absolute.
*/
if (*oldval) {
- if (cputime_to_nsecs(*oldval) <= now) {
+ if (*oldval <= now) {
/* Just about to fire. */
- *oldval = cputime_one_jiffy;
+ *oldval = TICK_NSEC;
} else {
- *oldval -= nsecs_to_cputime(now);
+ *oldval -= now;
}
}
if (!*newval)
return;
- *newval += nsecs_to_cputime(now);
+ *newval += now;
}
- new = cputime_to_nsecs(*newval);
-
/*
* Update expiration cache if we are the earliest timer, or eventually
* RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
*/
switch (clock_idx) {
case CPUCLOCK_PROF:
- if (expires_gt(tsk->signal->cputime_expires.prof_exp, new))
- tsk->signal->cputime_expires.prof_exp = new;
+ if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
+ tsk->signal->cputime_expires.prof_exp = *newval;
break;
case CPUCLOCK_VIRT:
- if (expires_gt(tsk->signal->cputime_expires.virt_exp, new))
- tsk->signal->cputime_expires.virt_exp = new;
+ if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
+ tsk->signal->cputime_expires.virt_exp = *newval;
break;
}
--
2.7.4
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/posix-cpu-timers.c | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 286c590..9e7a5be 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -894,8 +894,6 @@ static inline void stop_process_timers(struct signal_struct *sig)
tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
}
-static u32 onecputick;
-
static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
unsigned long long *expires,
unsigned long long cur_time, int signo)
@@ -907,9 +905,9 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
if (it->incr) {
it->expires += it->incr;
it->error += it->incr_error;
- if (it->error >= onecputick) {
+ if (it->error >= TICK_NSEC) {
it->expires -= cputime_one_jiffy;
- it->error -= onecputick;
+ it->error -= TICK_NSEC;
}
} else {
it->expires = 0;
@@ -1480,15 +1478,10 @@ static __init int init_posix_cpu_timers(void)
.clock_get = thread_cpu_clock_get,
.timer_create = thread_cpu_timer_create,
};
- struct timespec ts;
posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
- cputime_to_timespec(cputime_one_jiffy, &ts);
- onecputick = ts.tv_nsec;
- WARN_ON(ts.tv_sec != 0);
-
return 0;
}
__initcall(init_posix_cpu_timers);
--
2.7.4
Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/signal.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/kernel/signal.c b/kernel/signal.c
index 5d5f6f7..2746cf4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1575,7 +1575,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
unsigned long flags;
struct sighand_struct *psig;
bool autoreap = false;
- cputime_t utime, stime;
+ u64 utime, stime;
BUG_ON(sig == -1);
@@ -1613,9 +1613,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
task_uid(tsk));
rcu_read_unlock();
- task_cputime_t(tsk, &utime, &stime);
- info.si_utime = cputime_to_clock_t(utime + nsecs_to_cputime(tsk->signal->utime));
- info.si_stime = cputime_to_clock_t(stime + nsecs_to_cputime(tsk->signal->stime));
+ task_cputime(tsk, &utime, &stime);
+ info.si_utime = nsec_to_clock_t(utime + tsk->signal->utime);
+ info.si_stime = nsec_to_clock_t(stime + tsk->signal->stime);
info.si_status = tsk->exit_code & 0x7f;
if (tsk->exit_code & 0x80)
@@ -1679,7 +1679,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
unsigned long flags;
struct task_struct *parent;
struct sighand_struct *sighand;
- cputime_t utime, stime;
+ u64 utime, stime;
if (for_ptracer) {
parent = tsk->parent;
@@ -1698,9 +1698,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk));
rcu_read_unlock();
- task_cputime_t(tsk, &utime, &stime);
- info.si_utime = cputime_to_clock_t(utime);
- info.si_stime = cputime_to_clock_t(stime);
+ task_cputime(tsk, &utime, &stime);
+ info.si_utime = nsec_to_clock_t(utime);
+ info.si_stime = nsec_to_clock_t(stime);
info.si_code = why;
switch (why) {
--
2.7.4
Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/delayacct.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 228640f..6605496 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -82,19 +82,19 @@ void __delayacct_blkio_end(void)
int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
{
- cputime_t utime, stime, stimescaled, utimescaled;
+ u64 utime, stime, stimescaled, utimescaled;
unsigned long long t2, t3;
unsigned long flags, t1;
s64 tmp;
- task_cputime_t(tsk, &utime, &stime);
+ task_cputime(tsk, &utime, &stime);
tmp = (s64)d->cpu_run_real_total;
- tmp += cputime_to_nsecs(utime + stime);
+ tmp += utime + stime;
d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
- task_cputime_t_scaled(tsk, &utimescaled, &stimescaled);
+ task_cputime_scaled(tsk, &utimescaled, &stimescaled);
tmp = (s64)d->cpu_scaled_run_real_total;
- tmp += cputime_to_nsecs(utimescaled + stimescaled);
+ tmp += utimescaled + stimescaled;
d->cpu_scaled_run_real_total =
(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
--
2.7.4
Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/sched.h | 2 +-
kernel/tsacct.c | 27 ++++++++++++---------------
2 files changed, 13 insertions(+), 16 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 75f9f4e..394e85d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1800,7 +1800,7 @@ struct task_struct {
#if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1; /* accumulated rss usage */
u64 acct_vm_mem1; /* accumulated virtual memory usage */
- cputime_t acct_timexpd; /* stime + utime since last update */
+ u64 acct_timexpd; /* stime + utime since last update */
#endif
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed; /* Protected by alloc_lock */
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 040d0a6..5c21f05 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -31,7 +31,7 @@ void bacct_add_tsk(struct user_namespace *user_ns,
struct taskstats *stats, struct task_struct *tsk)
{
const struct cred *tcred;
- cputime_t utime, stime, utimescaled, stimescaled;
+ u64 utime, stime, utimescaled, stimescaled;
u64 delta;
BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN);
@@ -66,13 +66,13 @@ void bacct_add_tsk(struct user_namespace *user_ns,
task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0;
rcu_read_unlock();
- task_cputime_t(tsk, &utime, &stime);
- stats->ac_utime = cputime_to_usecs(utime);
- stats->ac_stime = cputime_to_usecs(stime);
+ task_cputime(tsk, &utime, &stime);
+ stats->ac_utime = div_u64(utime, NSEC_PER_USEC);
+ stats->ac_stime = div_u64(stime, NSEC_PER_USEC);
- task_cputime_t_scaled(tsk, &utimescaled, &stimescaled);
- stats->ac_utimescaled = cputime_to_usecs(utimescaled);
- stats->ac_stimescaled = cputime_to_usecs(stimescaled);
+ task_cputime_scaled(tsk, &utimescaled, &stimescaled);
+ stats->ac_utimescaled = div_u64(utimescaled, NSEC_PER_USEC);
+ stats->ac_stimescaled = div_u64(stimescaled, NSEC_PER_USEC);
stats->ac_minflt = tsk->min_flt;
stats->ac_majflt = tsk->maj_flt;
@@ -123,18 +123,15 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
#undef MB
static void __acct_update_integrals(struct task_struct *tsk,
- cputime_t utime, cputime_t stime)
+ u64 utime, u64 stime)
{
- cputime_t time, dtime;
- u64 delta;
+ u64 time, delta;
if (!likely(tsk->mm))
return;
time = stime + utime;
- dtime = time - tsk->acct_timexpd;
- /* Avoid division: cputime_t is often in nanoseconds already. */
- delta = cputime_to_nsecs(dtime);
+ delta = time - tsk->acct_timexpd;
if (delta < TICK_NSEC)
return;
@@ -155,11 +152,11 @@ static void __acct_update_integrals(struct task_struct *tsk,
*/
void acct_update_integrals(struct task_struct *tsk)
{
- cputime_t utime, stime;
+ u64 utime, stime;
unsigned long flags;
local_irq_save(flags);
- task_cputime_t(tsk, &utime, &stime);
+ task_cputime(tsk, &utime, &stime);
__acct_update_integrals(tsk, utime, stime);
local_irq_restore(flags);
}
--
2.7.4
Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/mips/kernel/binfmt_elfn32.c | 11 -----------
arch/mips/kernel/binfmt_elfo32.c | 11 -----------
arch/parisc/kernel/binfmt_elf32.c | 10 ----------
fs/binfmt_elf.c | 26 ++++++++++----------------
fs/binfmt_elf_fdpic.c | 16 ++++++++--------
fs/compat_binfmt_elf.c | 20 +++++++++++---------
6 files changed, 29 insertions(+), 65 deletions(-)
diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c
index 9c7f3e1..9ba9d79 100644
--- a/arch/mips/kernel/binfmt_elfn32.c
+++ b/arch/mips/kernel/binfmt_elfn32.c
@@ -99,15 +99,4 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
#undef TASK_SIZE
#define TASK_SIZE TASK_SIZE32
-#undef cputime_to_timeval
-#define cputime_to_timeval cputime_to_compat_timeval
-static __inline__ void
-cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
-{
- unsigned long jiffies = cputime_to_jiffies(cputime);
-
- value->tv_usec = (jiffies % HZ) * (1000000L / HZ);
- value->tv_sec = jiffies / HZ;
-}
-
#include "../../../fs/binfmt_elf.c"
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index 1ab3432..8a0435a 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -102,15 +102,4 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
#undef TASK_SIZE
#define TASK_SIZE TASK_SIZE32
-#undef cputime_to_timeval
-#define cputime_to_timeval cputime_to_compat_timeval
-static __inline__ void
-cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
-{
- unsigned long jiffies = cputime_to_jiffies(cputime);
-
- value->tv_usec = (jiffies % HZ) * (1000000L / HZ);
- value->tv_sec = jiffies / HZ;
-}
-
#include "../../../fs/binfmt_elf.c"
diff --git a/arch/parisc/kernel/binfmt_elf32.c b/arch/parisc/kernel/binfmt_elf32.c
index 00dc66f..0a28298 100644
--- a/arch/parisc/kernel/binfmt_elf32.c
+++ b/arch/parisc/kernel/binfmt_elf32.c
@@ -91,14 +91,4 @@ struct elf_prpsinfo32
current->thread.map_base = DEFAULT_MAP_BASE32; \
current->thread.task_size = DEFAULT_TASK_SIZE32 \
-#undef cputime_to_timeval
-#define cputime_to_timeval cputime_to_compat_timeval
-static __inline__ void
-cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
-{
- unsigned long jiffies = cputime_to_jiffies(cputime);
- value->tv_usec = (jiffies % HZ) * (1000000L / HZ);
- value->tv_sec = jiffies / HZ;
-}
-
#include "../../../fs/binfmt_elf.c"
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 720a88d..bee791c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1411,8 +1411,6 @@ static void fill_note(struct memelfnote *note, const char *name, int type,
static void fill_prstatus(struct elf_prstatus *prstatus,
struct task_struct *p, long signr)
{
- struct timeval tv;
-
prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
prstatus->pr_sigpend = p->pending.signal.sig[0];
prstatus->pr_sighold = p->blocked.sig[0];
@@ -1423,29 +1421,25 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
prstatus->pr_pgrp = task_pgrp_vnr(p);
prstatus->pr_sid = task_session_vnr(p);
if (thread_group_leader(p)) {
- struct task_cputime_t cputime;
+ struct task_cputime cputime;
/*
* This is the record for the group leader. It shows the
* group-wide total, not its individual thread total.
*/
- thread_group_cputime_t(p, &cputime);
- cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
- cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
+ thread_group_cputime(p, &cputime);
+ prstatus->pr_utime = ns_to_timeval(cputime.utime);
+ prstatus->pr_stime = ns_to_timeval(cputime.stime);
} else {
- cputime_t utime, stime;
+ u64 utime, stime;
- task_cputime_t(p, &utime, &stime);
- cputime_to_timeval(utime, &prstatus->pr_utime);
- cputime_to_timeval(stime, &prstatus->pr_stime);
+ task_cputime(p, &utime, &stime);
+ prstatus->pr_utime = ns_to_timeval(utime);
+ prstatus->pr_stime = ns_to_timeval(stime);
}
- tv = ns_to_timeval(p->signal->cutime);
- prstatus->pr_cutime.tv_sec = tv.tv_sec;
- prstatus->pr_cutime.tv_usec = tv.tv_usec;
- tv = ns_to_timeval(p->signal->cstime);
- prstatus->pr_cstime.tv_sec = tv.tv_sec;
- prstatus->pr_cstime.tv_usec = tv.tv_usec;
+ prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
+ prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
}
static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 977b731..dd0476f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1342,21 +1342,21 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
prstatus->pr_pgrp = task_pgrp_vnr(p);
prstatus->pr_sid = task_session_vnr(p);
if (thread_group_leader(p)) {
- struct task_cputime_t cputime;
+ struct task_cputime cputime;
/*
* This is the record for the group leader. It shows the
* group-wide total, not its individual thread total.
*/
- thread_group_cputime_t(p, &cputime);
- cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
- cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
+ thread_group_cputime(p, &cputime);
+ prstatus->pr_utime = ns_to_timeval(cputime.utime);
+ prstatus->pr_stime = ns_to_timeval(cputime.stime);
} else {
- cputime_t utime, stime;
+ u64 utime, stime;
- task_cputime_t(p, &utime, &stime);
- cputime_to_timeval(utime, &prstatus->pr_utime);
- cputime_to_timeval(stime, &prstatus->pr_stime);
+ task_cputime(p, &utime, &stime);
+ prstatus->pr_utime = ns_to_timeval(utime);
+ prstatus->pr_stime = ns_to_timeval(stime);
}
prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index 4d24d17..73fa05d 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -52,21 +52,23 @@
#define elf_prpsinfo compat_elf_prpsinfo
/*
- * Compat version of cputime_to_compat_timeval, perhaps this
+ * Compat version of ns_to_timeval, perhaps this
* should be an inline in <linux/compat.h>.
*/
-static void cputime_to_compat_timeval(const cputime_t cputime,
- struct compat_timeval *value)
+static struct compat_timeval ns_to_compat_timeval(const s64 nsec)
{
struct timeval tv;
- cputime_to_timeval(cputime, &tv);
- value->tv_sec = tv.tv_sec;
- value->tv_usec = tv.tv_usec;
+ struct compat_timeval ctv;
+
+ tv = ns_to_timeval(nsec);
+ ctv.tv_sec = tv.tv_sec;
+ ctv.tv_usec = tv.tv_usec;
+
+ return ctv;
}
-#undef cputime_to_timeval
-#define cputime_to_timeval cputime_to_compat_timeval
-
+#undef ns_to_timeval
+#define ns_to_timeval ns_to_compat_timeval
/*
* To use this file, asm/elf.h must define compat_elf_check_arch.
--
2.7.4
Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/sched.h | 2 +-
kernel/acct.c | 9 +++++----
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1f7f6d..75f9f4e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -561,7 +561,7 @@ struct pacct_struct {
int ac_flag;
long ac_exitcode;
unsigned long ac_mem;
- cputime_t ac_utime, ac_stime;
+ u64 ac_utime, ac_stime;
unsigned long ac_minflt, ac_majflt;
};
diff --git a/kernel/acct.c b/kernel/acct.c
index b9b190a8..ca9cb55 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -453,8 +453,8 @@ static void fill_ac(acct_t *ac)
spin_lock_irq(¤t->sighand->siglock);
tty = current->signal->tty; /* Safe as we hold the siglock */
ac->ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
- ac->ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
- ac->ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
+ ac->ac_utime = encode_comp_t(nsec_to_AHZ(pacct->ac_utime));
+ ac->ac_stime = encode_comp_t(nsec_to_AHZ(pacct->ac_stime));
ac->ac_flag = pacct->ac_flag;
ac->ac_mem = encode_comp_t(pacct->ac_mem);
ac->ac_minflt = encode_comp_t(pacct->ac_minflt);
@@ -530,7 +530,7 @@ static void do_acct_process(struct bsd_acct_struct *acct)
void acct_collect(long exitcode, int group_dead)
{
struct pacct_struct *pacct = ¤t->signal->pacct;
- cputime_t utime, stime;
+ u64 utime, stime;
unsigned long vsize = 0;
if (group_dead && current->mm) {
@@ -559,7 +559,8 @@ void acct_collect(long exitcode, int group_dead)
pacct->ac_flag |= ACORE;
if (current->flags & PF_SIGNALED)
pacct->ac_flag |= AXSIG;
- task_cputime_t(current, &utime, &stime);
+
+ task_cputime(current, &utime, &stime);
pacct->ac_utime += utime;
pacct->ac_stime += stime;
pacct->ac_minflt += current->min_flt;
--
2.7.4
cputime_t is being obsolete and replaced by nsecs units in order to make
internal timestamps less opaque and more granular.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
fs/proc/array.c | 6 +++---
include/linux/sched.h | 10 +++++-----
kernel/sched/cputime.c | 8 ++++----
3 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 81818ad..2a7b0ed 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -401,7 +401,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
unsigned long cmin_flt = 0, cmaj_flt = 0;
unsigned long min_flt = 0, maj_flt = 0;
cputime_t cutime, cstime, utime, stime;
- cputime_t cgtime, gtime;
+ u64 cgtime, gtime;
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
unsigned long flags;
@@ -541,8 +541,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, " ", task->rt_priority);
seq_put_decimal_ull(m, " ", task->policy);
seq_put_decimal_ull(m, " ", delayacct_blkio_ticks(task));
- seq_put_decimal_ull(m, " ", cputime_to_clock_t(gtime));
- seq_put_decimal_ll(m, " ", cputime_to_clock_t(cgtime));
+ seq_put_decimal_ull(m, " ", nsec_to_clock_t(gtime));
+ seq_put_decimal_ll(m, " ", nsec_to_clock_t(cgtime));
if (mm && permitted) {
seq_put_decimal_ull(m, " ", mm->start_data);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7ccdf01..5854e70 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -772,8 +772,8 @@ struct signal_struct {
*/
seqlock_t stats_lock;
cputime_t utime, stime, cutime, cstime;
- cputime_t gtime;
- cputime_t cgtime;
+ u64 gtime;
+ u64 cgtime;
struct prev_cputime prev_cputime;
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
@@ -1644,7 +1644,7 @@ struct task_struct {
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
cputime_t utimescaled, stimescaled;
#endif
- cputime_t gtime;
+ u64 gtime;
struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqcount_t vtime_seqcount;
@@ -2236,7 +2236,7 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime);
-extern cputime_t task_gtime(struct task_struct *t);
+extern u64 task_gtime(struct task_struct *t);
#else
static inline void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime)
@@ -2245,7 +2245,7 @@ static inline void task_cputime(struct task_struct *t,
*stime = t->stime;
}
-static inline cputime_t task_gtime(struct task_struct *t)
+static inline u64 task_gtime(struct task_struct *t)
{
return t->gtime;
}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 470c137..647c0fc 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -158,7 +158,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
/* Add guest time to process. */
p->utime += cputime;
account_group_user_time(p, cputime);
- p->gtime += cputime;
+ p->gtime += cputime_to_nsecs(cputime);
/* Add guest time to cpustat. */
if (task_nice(p) > 0) {
@@ -826,10 +826,10 @@ void vtime_init_idle(struct task_struct *t, int cpu)
local_irq_restore(flags);
}
-cputime_t task_gtime(struct task_struct *t)
+u64 task_gtime(struct task_struct *t)
{
unsigned int seq;
- cputime_t gtime;
+ u64 gtime;
if (!vtime_accounting_enabled())
return t->gtime;
@@ -839,7 +839,7 @@ cputime_t task_gtime(struct task_struct *t)
gtime = t->gtime;
if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
- gtime += vtime_delta(t);
+ gtime += cputime_to_nsecs(vtime_delta(t));
} while (read_seqcount_retry(&t->vtime_seqcount, seq));
--
2.7.4
Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/x86/kernel/apm_32.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index b578839..516e6da 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -905,21 +905,21 @@ static int apm_cpu_idle(struct cpuidle_device *dev,
{
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
- static unsigned int last_stime; /* = 0 */
- cputime_t stime, utime;
+ static u64 last_stime; /* = 0 */
+ u64 stime, utime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
unsigned int bucket;
recalc:
- task_cputime_t(current, &utime, &stime);
+ task_cputime(current, &utime, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
} else if (jiffies_since_last_check > idle_period) {
unsigned int idle_percentage;
- idle_percentage = cputime_to_jiffies(stime - last_stime);
+ idle_percentage = nsecs_to_jiffies(stime - last_stime);
idle_percentage *= 100;
idle_percentage /= jiffies_since_last_check;
use_apm_idle = (idle_percentage > idle_threshold);
--
2.7.4
Use the new nsec based cputime accessors as part of the whole cputime
conversion from cputime_t to nsecs.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/alpha/kernel/osf_sys.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 0ccf1d1..488cc9a 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1137,7 +1137,7 @@ struct rusage32 {
SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
{
struct rusage32 r;
- cputime_t utime, stime;
+ u64 utime, stime;
unsigned long utime_jiffies, stime_jiffies;
if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
@@ -1146,9 +1146,9 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
memset(&r, 0, sizeof(r));
switch (who) {
case RUSAGE_SELF:
- task_cputime_t(current, &utime, &stime);
- utime_jiffies = cputime_to_jiffies(utime);
- stime_jiffies = cputime_to_jiffies(stime);
+ task_cputime(current, &utime, &stime);
+ utime_jiffies = nsecs_to_jiffies(utime);
+ stime_jiffies = nsecs_to_jiffies(stime);
jiffies_to_timeval32(utime_jiffies, &r.ru_utime);
jiffies_to_timeval32(stime_jiffies, &r.ru_stime);
r.ru_minflt = current->min_flt;
--
2.7.4
Now that most cputime readers use the transition API which return the
task cputime in old style cputime_t, we can safely store the cputime in
nsecs. This will eventually make cputime statistics less opaque and more
granular. Back and forth convertions between cputime_t and nsecs in order
to deal with cputime_t random granularity won't be needed anymore.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/alpha/kernel/osf_sys.c | 4 ++--
arch/powerpc/kernel/time.c | 4 ++--
arch/s390/kernel/vtime.c | 6 ++---
arch/x86/kvm/hyperv.c | 5 +++--
fs/binfmt_elf.c | 11 +++++++--
fs/binfmt_elf_fdpic.c | 4 ++--
fs/proc/array.c | 10 ++++-----
include/linux/sched.h | 55 ++++++++++++++++++++++++++++-----------------
kernel/exit.c | 4 ++--
kernel/sched/cputime.c | 35 ++++++++++++++---------------
kernel/signal.c | 4 ++--
kernel/sys.c | 16 ++++++-------
12 files changed, 89 insertions(+), 69 deletions(-)
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index c26d631..0ccf1d1 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1155,8 +1155,8 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
r.ru_majflt = current->maj_flt;
break;
case RUSAGE_CHILDREN:
- utime_jiffies = cputime_to_jiffies(current->signal->cutime);
- stime_jiffies = cputime_to_jiffies(current->signal->cstime);
+ utime_jiffies = nsecs_to_jiffies(current->signal->cutime);
+ stime_jiffies = nsecs_to_jiffies(current->signal->cstime);
jiffies_to_timeval32(utime_jiffies, &r.ru_utime);
jiffies_to_timeval32(stime_jiffies, &r.ru_stime);
r.ru_minflt = current->signal->cmin_flt;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index be9751f..19361fb 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -359,7 +359,7 @@ void vtime_account_system(struct task_struct *tsk)
delta = vtime_delta(tsk, &sys_scaled, &stolen);
account_system_time(tsk, 0, delta);
- tsk->stimescaled += sys_scaled;
+ tsk->stimescaled += cputime_to_nsecs(sys_scaled);
if (stolen)
account_steal_time(stolen);
}
@@ -393,7 +393,7 @@ void vtime_account_user(struct task_struct *tsk)
acct->user_time_scaled = 0;
acct->utime_sspurr = 0;
account_user_time(tsk, utime);
- tsk->utimescaled += utimescaled;
+ tsk->utimescaled += cputime_to_nsecs(utimescaled);
}
#ifdef CONFIG_PPC32
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 1bd5dde..4905e8c 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -138,9 +138,9 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
system_scaled = (system_scaled * mult) / div;
}
account_user_time(tsk, user);
- tsk->utimescaled += user_scaled;
+ tsk->utimescaled += cputime_to_nsecs(user_scaled);
account_system_time(tsk, hardirq_offset, system);
- tsk->stimescaled += system_scaled;
+ tsk->stimescaled += cputime_to_nsecs(system_scaled);
steal = S390_lowcore.steal_timer;
if ((s64) steal > 0) {
@@ -205,7 +205,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
system_scaled = (system_scaled * mult) / div;
}
account_system_time(tsk, 0, system);
- tsk->stimescaled += system_scaled;
+ tsk->stimescaled += cputime_to_nsecs(system_scaled);
virt_timer_forward(system);
}
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 42b1c83..7ae7006 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -958,10 +958,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
/* Calculate cpu time spent by current task in 100ns units */
static u64 current_task_runtime_100ns(void)
{
- cputime_t utime, stime;
+ u64 utime, stime;
task_cputime_adjusted(current, &utime, &stime);
- return div_u64(cputime_to_nsecs(utime + stime), 100);
+
+ return div_u64(utime + stime, 100);
}
static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0f62ac5..720a88d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1411,6 +1411,8 @@ static void fill_note(struct memelfnote *note, const char *name, int type,
static void fill_prstatus(struct elf_prstatus *prstatus,
struct task_struct *p, long signr)
{
+ struct timeval tv;
+
prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
prstatus->pr_sigpend = p->pending.signal.sig[0];
prstatus->pr_sighold = p->blocked.sig[0];
@@ -1437,8 +1439,13 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
cputime_to_timeval(utime, &prstatus->pr_utime);
cputime_to_timeval(stime, &prstatus->pr_stime);
}
- cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
- cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
+ tv = ns_to_timeval(p->signal->cutime);
+ prstatus->pr_cutime.tv_sec = tv.tv_sec;
+ prstatus->pr_cutime.tv_usec = tv.tv_usec;
+
+ tv = ns_to_timeval(p->signal->cstime);
+ prstatus->pr_cstime.tv_sec = tv.tv_sec;
+ prstatus->pr_cstime.tv_usec = tv.tv_usec;
}
static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 29e175d..977b731 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1358,8 +1358,8 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
cputime_to_timeval(utime, &prstatus->pr_utime);
cputime_to_timeval(stime, &prstatus->pr_stime);
}
- cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
- cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
+ prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
+ prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
prstatus->pr_exec_fdpic_loadmap = p->mm->context.exec_fdpic_loadmap;
prstatus->pr_interp_fdpic_loadmap = p->mm->context.interp_fdpic_loadmap;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 2a7b0ed..697e424 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -400,7 +400,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
unsigned long long start_time;
unsigned long cmin_flt = 0, cmaj_flt = 0;
unsigned long min_flt = 0, maj_flt = 0;
- cputime_t cutime, cstime, utime, stime;
+ u64 cutime, cstime, utime, stime;
u64 cgtime, gtime;
unsigned long rsslim = 0;
char tcomm[sizeof(task->comm)];
@@ -496,10 +496,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_put_decimal_ull(m, " ", cmin_flt);
seq_put_decimal_ull(m, " ", maj_flt);
seq_put_decimal_ull(m, " ", cmaj_flt);
- seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime));
- seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime));
- seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime));
- seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime));
+ seq_put_decimal_ull(m, " ", nsec_to_clock_t(utime));
+ seq_put_decimal_ull(m, " ", nsec_to_clock_t(stime));
+ seq_put_decimal_ll(m, " ", nsec_to_clock_t(cutime));
+ seq_put_decimal_ll(m, " ", nsec_to_clock_t(cstime));
seq_put_decimal_ll(m, " ", priority);
seq_put_decimal_ll(m, " ", nice);
seq_put_decimal_ll(m, " ", num_threads);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6cf884d..c1f7f6d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -583,8 +583,8 @@ struct cpu_itimer {
*/
struct prev_cputime {
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- cputime_t utime;
- cputime_t stime;
+ u64 utime;
+ u64 stime;
raw_spinlock_t lock;
#endif
};
@@ -599,8 +599,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev)
/**
* struct task_cputime - collected CPU time counts
- * @utime: time spent in user mode, in &cputime_t units
- * @stime: time spent in kernel mode, in &cputime_t units
+ * @utime: time spent in user mode, in nanoseconds
+ * @stime: time spent in kernel mode, in nanoseconds
* @sum_exec_runtime: total time spent on the CPU, in nanoseconds
*
* This structure groups together three kinds of CPU time that are tracked for
@@ -608,8 +608,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev)
* these counts together and treat all three of them in parallel.
*/
struct task_cputime {
- cputime_t utime;
- cputime_t stime;
+ u64 utime;
+ u64 stime;
unsigned long long sum_exec_runtime;
};
@@ -778,7 +778,7 @@ struct signal_struct {
* in __exit_signal, except for the group leader.
*/
seqlock_t stats_lock;
- cputime_t utime, stime, cutime, cstime;
+ u64 utime, stime, cutime, cstime;
u64 gtime;
u64 cgtime;
struct prev_cputime prev_cputime;
@@ -1647,9 +1647,9 @@ struct task_struct {
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
- cputime_t utime, stime;
+ u64 utime, stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
- cputime_t utimescaled, stimescaled;
+ u64 utimescaled, stimescaled;
#endif
u64 gtime;
struct prev_cputime prev_cputime;
@@ -2242,11 +2242,11 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void task_cputime(struct task_struct *t,
- cputime_t *utime, cputime_t *stime);
+ u64 *utime, u64 *stime);
extern u64 task_gtime(struct task_struct *t);
#else
static inline void task_cputime(struct task_struct *t,
- cputime_t *utime, cputime_t *stime)
+ u64 *utime, u64 *stime)
{
*utime = t->utime;
*stime = t->stime;
@@ -2260,16 +2260,16 @@ static inline u64 task_gtime(struct task_struct *t)
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
static inline void task_cputime_scaled(struct task_struct *t,
- cputime_t *utimescaled,
- cputime_t *stimescaled)
+ u64 *utimescaled,
+ u64 *stimescaled)
{
*utimescaled = t->utimescaled;
*stimescaled = t->stimescaled;
}
#else
static inline void task_cputime_scaled(struct task_struct *t,
- cputime_t *utimescaled,
- cputime_t *stimescaled)
+ u64 *utimescaled,
+ u64 *stimescaled)
{
task_cputime(t, utimescaled, stimescaled);
}
@@ -2278,18 +2278,26 @@ static inline void task_cputime_scaled(struct task_struct *t,
static inline void task_cputime_t(struct task_struct *t,
cputime_t *utime, cputime_t *stime)
{
- task_cputime(t, utime, stime);
+ u64 ut, st;
+
+ task_cputime(t, &ut, &st);
+ *utime = nsecs_to_cputime(ut);
+ *stime = nsecs_to_cputime(st);
}
static inline void task_cputime_t_scaled(struct task_struct *t,
cputime_t *utimescaled,
cputime_t *stimescaled)
{
- task_cputime_scaled(t, utimescaled, stimescaled);
+ u64 ut, st;
+
+ task_cputime_scaled(t, &ut, &st);
+ *utimescaled = nsecs_to_cputime(ut);
+ *stimescaled = nsecs_to_cputime(st);
}
-extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
-extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
+extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
/*
* Per process flags
@@ -3493,9 +3501,14 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times);
static inline void thread_group_cputime_t(struct task_struct *tsk,
- struct task_cputime_t *times)
+ struct task_cputime_t *cputime)
{
- thread_group_cputime(tsk, (struct task_cputime *)times);
+ struct task_cputime times;
+
+ thread_group_cputime(tsk, ×);
+ cputime->utime = nsecs_to_cputime(times.utime);
+ cputime->stime = nsecs_to_cputime(times.stime);
+ cputime->sum_exec_runtime = times.sum_exec_runtime;
}
/*
diff --git a/kernel/exit.c b/kernel/exit.c
index 9d68c45..ab84e72 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -85,7 +85,7 @@ static void __exit_signal(struct task_struct *tsk)
bool group_dead = thread_group_leader(tsk);
struct sighand_struct *sighand;
struct tty_struct *uninitialized_var(tty);
- cputime_t utime, stime;
+ u64 utime, stime;
sighand = rcu_dereference_check(tsk->sighand,
lockdep_tasklist_lock_is_held());
@@ -1079,7 +1079,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
struct signal_struct *sig = p->signal;
struct signal_struct *psig = current->signal;
unsigned long maxrss;
- cputime_t tgutime, tgstime;
+ u64 tgutime, tgstime;
/*
* The resource counters for the group leader are in its
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 647c0fc..26cd477 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -134,7 +134,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
int index;
/* Add user time to process. */
- p->utime += cputime;
+ p->utime += cputime_to_nsecs(cputime);
account_group_user_time(p, cputime);
index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
@@ -156,7 +156,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
u64 *cpustat = kcpustat_this_cpu->cpustat;
/* Add guest time to process. */
- p->utime += cputime;
+ p->utime += cputime_to_nsecs(cputime);
account_group_user_time(p, cputime);
p->gtime += cputime_to_nsecs(cputime);
@@ -180,7 +180,7 @@ static inline
void __account_system_time(struct task_struct *p, cputime_t cputime, int index)
{
/* Add system time to process. */
- p->stime += cputime;
+ p->stime += cputime_to_nsecs(cputime);
account_group_system_time(p, cputime);
/* Add system time to cpustat. */
@@ -315,7 +315,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
{
struct signal_struct *sig = tsk->signal;
- cputime_t utime, stime;
+ u64 utime, stime;
struct task_struct *t;
unsigned int seq, nextseq;
unsigned long flags;
@@ -467,14 +467,14 @@ void vtime_account_irq_enter(struct task_struct *tsk)
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
-void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
{
*ut = p->utime;
*st = p->stime;
}
EXPORT_SYMBOL_GPL(task_cputime_adjusted);
-void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
{
struct task_cputime cputime;
@@ -545,7 +545,7 @@ void account_idle_ticks(unsigned long ticks)
* Perform (stime * rtime) / total, but avoid multiplication overflow by
* loosing precision when the numbers are big.
*/
-static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
+static u64 scale_stime(u64 stime, u64 rtime, u64 total)
{
u64 scaled;
@@ -582,7 +582,7 @@ static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
* followed by a 64/32->64 divide.
*/
scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
- return (__force cputime_t) scaled;
+ return scaled;
}
/*
@@ -607,14 +607,14 @@ static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
*/
static void cputime_adjust(struct task_cputime *curr,
struct prev_cputime *prev,
- cputime_t *ut, cputime_t *st)
+ u64 *ut, u64 *st)
{
- cputime_t rtime, stime, utime;
+ u64 rtime, stime, utime;
unsigned long flags;
/* Serialize concurrent callers such that we can honour our guarantees */
raw_spin_lock_irqsave(&prev->lock, flags);
- rtime = nsecs_to_cputime(curr->sum_exec_runtime);
+ rtime = curr->sum_exec_runtime;
/*
* This is possible under two circumstances:
@@ -645,8 +645,7 @@ static void cputime_adjust(struct task_cputime *curr,
goto update;
}
- stime = scale_stime((__force u64)stime, (__force u64)rtime,
- (__force u64)(stime + utime));
+ stime = scale_stime(stime, rtime, stime + utime);
update:
/*
@@ -679,7 +678,7 @@ static void cputime_adjust(struct task_cputime *curr,
raw_spin_unlock_irqrestore(&prev->lock, flags);
}
-void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
{
struct task_cputime cputime = {
.sum_exec_runtime = p->se.sum_exec_runtime,
@@ -690,7 +689,7 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
}
EXPORT_SYMBOL_GPL(task_cputime_adjusted);
-void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
{
struct task_cputime cputime;
@@ -851,9 +850,9 @@ u64 task_gtime(struct task_struct *t)
* add up the pending nohz execution time since the last
* cputime snapshot.
*/
-void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
+void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
{
- cputime_t delta;
+ u64 delta;
unsigned int seq;
if (!vtime_accounting_enabled()) {
@@ -872,7 +871,7 @@ void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t))
continue;
- delta = vtime_delta(t);
+ delta = cputime_to_nsecs(vtime_delta(t));
/*
* Task runs either in user or kernel space, add pending nohz time to
diff --git a/kernel/signal.c b/kernel/signal.c
index db189b4..5d5f6f7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1614,8 +1614,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
rcu_read_unlock();
task_cputime_t(tsk, &utime, &stime);
- info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime);
- info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime);
+ info.si_utime = cputime_to_clock_t(utime + nsecs_to_cputime(tsk->signal->utime));
+ info.si_stime = cputime_to_clock_t(stime + nsecs_to_cputime(tsk->signal->stime));
info.si_status = tsk->exit_code & 0x7f;
if (tsk->exit_code & 0x80)
diff --git a/kernel/sys.c b/kernel/sys.c
index 89d5be4..0dd8031 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -881,15 +881,15 @@ SYSCALL_DEFINE0(getegid)
void do_sys_times(struct tms *tms)
{
- cputime_t tgutime, tgstime, cutime, cstime;
+ u64 tgutime, tgstime, cutime, cstime;
thread_group_cputime_adjusted(current, &tgutime, &tgstime);
cutime = current->signal->cutime;
cstime = current->signal->cstime;
- tms->tms_utime = cputime_to_clock_t(tgutime);
- tms->tms_stime = cputime_to_clock_t(tgstime);
- tms->tms_cutime = cputime_to_clock_t(cutime);
- tms->tms_cstime = cputime_to_clock_t(cstime);
+ tms->tms_utime = nsec_to_clock_t(tgutime);
+ tms->tms_stime = nsec_to_clock_t(tgstime);
+ tms->tms_cutime = nsec_to_clock_t(cutime);
+ tms->tms_cstime = nsec_to_clock_t(cstime);
}
SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
@@ -1543,7 +1543,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
{
struct task_struct *t;
unsigned long flags;
- cputime_t tgutime, tgstime, utime, stime;
+ u64 tgutime, tgstime, utime, stime;
unsigned long maxrss = 0;
memset((char *)r, 0, sizeof (*r));
@@ -1599,8 +1599,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
unlock_task_sighand(p, &flags);
out:
- cputime_to_timeval(utime, &r->ru_utime);
- cputime_to_timeval(stime, &r->ru_stime);
+ r->ru_utime = ns_to_timeval(utime);
+ r->ru_stime = ns_to_timeval(stime);
if (who != RUSAGE_CHILDREN) {
struct mm_struct *mm = get_task_mm(p);
--
2.7.4
cputime_t is being obsolete and replaced by nsecs units.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
drivers/macintosh/rack-meter.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index c114594..e199fd6 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -52,8 +52,8 @@ struct rackmeter_dma {
struct rackmeter_cpu {
struct delayed_work sniffer;
struct rackmeter *rm;
- cputime64_t prev_wall;
- cputime64_t prev_idle;
+ u64 prev_wall;
+ u64 prev_idle;
int zero;
} ____cacheline_aligned;
@@ -81,7 +81,7 @@ static int rackmeter_ignore_nice;
/* This is copied from cpufreq_ondemand, maybe we should put it in
* a common header somewhere
*/
-static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
+static inline u64 get_cpu_idle_time(unsigned int cpu)
{
u64 retval;
@@ -91,7 +91,7 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
if (rackmeter_ignore_nice)
retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
- return nsecs_to_cputime64(retval);
+ return retval;
}
static void rackmeter_setup_i2s(struct rackmeter *rm)
@@ -217,23 +217,23 @@ static void rackmeter_do_timer(struct work_struct *work)
container_of(work, struct rackmeter_cpu, sniffer.work);
struct rackmeter *rm = rcpu->rm;
unsigned int cpu = smp_processor_id();
- cputime64_t cur_jiffies, total_idle_ticks;
- unsigned int total_ticks, idle_ticks;
+ u64 cur_nsecs, total_idle_nsecs;
+ u64 total_nsecs, idle_nsecs;
int i, offset, load, cumm, pause;
- cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
- total_ticks = (unsigned int) (cur_jiffies - rcpu->prev_wall);
- rcpu->prev_wall = cur_jiffies;
+ cur_nsecs = jiffies64_to_nsecs(get_jiffies_64());
+ total_nsecs = cur_nsecs - rcpu->prev_wall;
+ rcpu->prev_wall = cur_nsecs;
- total_idle_ticks = get_cpu_idle_time(cpu);
- idle_ticks = (unsigned int) (total_idle_ticks - rcpu->prev_idle);
- idle_ticks = min(idle_ticks, total_ticks);
- rcpu->prev_idle = total_idle_ticks;
+ total_idle_nsecs = get_cpu_idle_time(cpu);
+ idle_nsecs = total_idle_nsecs - rcpu->prev_idle;
+ idle_nsecs = min(idle_nsecs, total_nsecs);
+ rcpu->prev_idle = total_idle_nsecs;
/* We do a very dumb calculation to update the LEDs for now,
* we'll do better once we have actual PWM implemented
*/
- load = (9 * (total_ticks - idle_ticks)) / total_ticks;
+ load = div64_u64(9 * (total_nsecs - idle_nsecs), total_nsecs);
offset = cpu << 3;
cumm = 0;
@@ -278,7 +278,7 @@ static void rackmeter_init_cpu_sniffer(struct rackmeter *rm)
continue;
rcpu = &rm->cpu[cpu];
rcpu->prev_idle = get_cpu_idle_time(cpu);
- rcpu->prev_wall = jiffies64_to_cputime64(get_jiffies_64());
+ rcpu->prev_wall = jiffies64_to_nsecs(get_jiffies_64());
schedule_delayed_work_on(cpu, &rm->cpu[cpu].sniffer,
msecs_to_jiffies(CPU_SAMPLING_RATE));
}
--
2.7.4
Kernel cpu stats are stored in cputime_t which is an architecture
defined type, and hence a bit opaque and requiring accessors and mutators
for any operation.
Converting them to nsecs simplifies the code and is one step toward
the removal of cputime_t in the core code.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/s390/appldata/appldata_os.c | 16 ++++-----
drivers/cpufreq/cpufreq.c | 6 ++--
drivers/cpufreq/cpufreq_governor.c | 2 +-
drivers/cpufreq/cpufreq_stats.c | 1 -
drivers/macintosh/rack-meter.c | 2 +-
fs/proc/stat.c | 68 +++++++++++++++++++-------------------
fs/proc/uptime.c | 7 ++--
kernel/sched/cpuacct.c | 2 +-
kernel/sched/cputime.c | 22 ++++++------
9 files changed, 61 insertions(+), 65 deletions(-)
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 69b23b2..08b9e94 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -113,21 +113,21 @@ static void appldata_get_os_data(void *data)
j = 0;
for_each_online_cpu(i) {
os_data->os_cpu[j].per_cpu_user =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
os_data->os_cpu[j].per_cpu_nice =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
os_data->os_cpu[j].per_cpu_system =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
os_data->os_cpu[j].per_cpu_idle =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
os_data->os_cpu[j].per_cpu_irq =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
os_data->os_cpu[j].per_cpu_softirq =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
os_data->os_cpu[j].per_cpu_iowait =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
os_data->os_cpu[j].per_cpu_steal =
- cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
+ nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
os_data->os_cpu[j].cpu_id = i;
j++;
}
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 6e6c1fb..229753d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -132,7 +132,7 @@ static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
u64 cur_wall_time;
u64 busy_time;
- cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+ cur_wall_time = jiffies64_to_nsecs(get_jiffies_64());
busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
@@ -143,9 +143,9 @@ static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
idle_time = cur_wall_time - busy_time;
if (wall)
- *wall = cputime_to_usecs(cur_wall_time);
+ *wall = div_u64(cur_wall_time, NSEC_PER_USEC);
- return cputime_to_usecs(idle_time);
+ return div_u64(idle_time, NSEC_PER_USEC);
}
u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy)
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 642dd0f..38d1a82 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -152,7 +152,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
if (ignore_nice) {
u64 cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
- idle_time += cputime_to_usecs(cur_nice - j_cdbs->prev_cpu_nice);
+ idle_time += div_u64(cur_nice - j_cdbs->prev_cpu_nice, NSEC_PER_USEC);
j_cdbs->prev_cpu_nice = cur_nice;
}
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 06d3abd..b084708 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -13,7 +13,6 @@
#include <linux/cpufreq.h>
#include <linux/module.h>
#include <linux/slab.h>
-#include <linux/cputime.h>
static DEFINE_SPINLOCK(cpufreq_stats_lock);
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index 7755271..c114594 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -91,7 +91,7 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
if (rackmeter_ignore_nice)
retval += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
- return retval;
+ return nsecs_to_cputime64(retval);
}
static void rackmeter_setup_i2s(struct rackmeter *rm)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index d700c42..44475a4 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -21,23 +21,23 @@
#ifdef arch_idle_time
-static cputime64_t get_idle_time(int cpu)
+static u64 get_idle_time(int cpu)
{
- cputime64_t idle;
+ u64 idle;
idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
- idle += arch_idle_time(cpu);
+ idle += cputime_to_nsecs(arch_idle_time(cpu));
return idle;
}
-static cputime64_t get_iowait_time(int cpu)
+static u64 get_iowait_time(int cpu)
{
- cputime64_t iowait;
+ u64 iowait;
iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
if (cpu_online(cpu) && nr_iowait_cpu(cpu))
- iowait += arch_idle_time(cpu);
+ iowait += cputime_to_nsecs(arch_idle_time(cpu));
return iowait;
}
@@ -45,32 +45,32 @@ static cputime64_t get_iowait_time(int cpu)
static u64 get_idle_time(int cpu)
{
- u64 idle, idle_time = -1ULL;
+ u64 idle, idle_usecs = -1ULL;
if (cpu_online(cpu))
- idle_time = get_cpu_idle_time_us(cpu, NULL);
+ idle_usecs = get_cpu_idle_time_us(cpu, NULL);
- if (idle_time == -1ULL)
+ if (idle_usecs == -1ULL)
/* !NO_HZ or cpu offline so we can rely on cpustat.idle */
idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
else
- idle = usecs_to_cputime64(idle_time);
+ idle = idle_usecs * NSEC_PER_USEC;
return idle;
}
static u64 get_iowait_time(int cpu)
{
- u64 iowait, iowait_time = -1ULL;
+ u64 iowait, iowait_usecs = -1ULL;
if (cpu_online(cpu))
- iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+ iowait_usecs = get_cpu_iowait_time_us(cpu, NULL);
- if (iowait_time == -1ULL)
+ if (iowait_usecs == -1ULL)
/* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
else
- iowait = usecs_to_cputime64(iowait_time);
+ iowait = iowait_usecs * NSEC_PER_USEC;
return iowait;
}
@@ -115,16 +115,16 @@ static int show_stat(struct seq_file *p, void *v)
}
sum += arch_irq_stat();
- seq_put_decimal_ull(p, "cpu ", cputime64_to_clock_t(user));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(user));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(system));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
seq_putc(p, '\n');
for_each_online_cpu(i) {
@@ -140,16 +140,16 @@ static int show_stat(struct seq_file *p, void *v)
guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
seq_printf(p, "cpu%d", i);
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(user));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(nice));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(system));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(idle));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(iowait));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(irq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(softirq));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(steal));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest));
- seq_put_decimal_ull(p, " ", cputime64_to_clock_t(guest_nice));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(user));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(system));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest));
+ seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice));
seq_putc(p, '\n');
}
seq_put_decimal_ull(p, "intr ", (unsigned long long)sum);
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 33de567..7981c4f 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -5,23 +5,20 @@
#include <linux/seq_file.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
-#include <linux/cputime.h>
static int uptime_proc_show(struct seq_file *m, void *v)
{
struct timespec uptime;
struct timespec idle;
- u64 idletime;
u64 nsec;
u32 rem;
int i;
- idletime = 0;
+ nsec = 0;
for_each_possible_cpu(i)
- idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
+ nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
get_monotonic_boottime(&uptime);
- nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
idle.tv_nsec = rem;
seq_printf(m, "%lu.%02lu %lu.%02lu\n",
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index bc0b309c..4c88279 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -297,7 +297,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v)
for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
seq_printf(sf, "%s %lld\n",
cpuacct_stat_desc[stat],
- cputime64_to_clock_t(val[stat]));
+ nsec_to_clock_t(val[stat]));
}
return 0;
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 7700a9c..470c137 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -75,9 +75,9 @@ static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime)
u64 *cpustat = kcpustat_this_cpu->cpustat;
cputime_t irq_cputime;
- irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx];
+ irq_cputime = nsecs_to_cputime64(irqtime - cpustat[idx]);
irq_cputime = min(irq_cputime, maxtime);
- cpustat[idx] += irq_cputime;
+ cpustat[idx] += cputime_to_nsecs(irq_cputime);
return irq_cputime;
}
@@ -140,7 +140,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
/* Add user time to cpustat. */
- task_group_account_field(p, index, (__force u64) cputime);
+ task_group_account_field(p, index, cputime_to_nsecs(cputime));
/* Account for user time used */
acct_account_cputime(p);
@@ -162,11 +162,11 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime)
/* Add guest time to cpustat. */
if (task_nice(p) > 0) {
- cpustat[CPUTIME_NICE] += (__force u64) cputime;
- cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
+ cpustat[CPUTIME_NICE] += cputime_to_nsecs(cputime);
+ cpustat[CPUTIME_GUEST_NICE] += cputime_to_nsecs(cputime);
} else {
- cpustat[CPUTIME_USER] += (__force u64) cputime;
- cpustat[CPUTIME_GUEST] += (__force u64) cputime;
+ cpustat[CPUTIME_USER] += cputime_to_nsecs(cputime);
+ cpustat[CPUTIME_GUEST] += cputime_to_nsecs(cputime);
}
}
@@ -184,7 +184,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime, int index)
account_group_system_time(p, cputime);
/* Add system time to cpustat. */
- task_group_account_field(p, index, (__force u64) cputime);
+ task_group_account_field(p, index, cputime_to_nsecs(cputime));
/* Account for system time used */
acct_account_cputime(p);
@@ -224,7 +224,7 @@ void account_steal_time(cputime_t cputime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
- cpustat[CPUTIME_STEAL] += (__force u64) cputime;
+ cpustat[CPUTIME_STEAL] += cputime_to_nsecs(cputime);
}
/*
@@ -237,9 +237,9 @@ void account_idle_time(cputime_t cputime)
struct rq *rq = this_rq();
if (atomic_read(&rq->nr_iowait) > 0)
- cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
+ cpustat[CPUTIME_IOWAIT] += cputime_to_nsecs(cputime);
else
- cpustat[CPUTIME_IDLE] += (__force u64) cputime;
+ cpustat[CPUTIME_IDLE] += cputime_to_nsecs(cputime);
}
/*
--
2.7.4
It's a leftover from removed code.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/sched.h | 7 -------
1 file changed, 7 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fe3ce46..7ccdf01 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -618,13 +618,6 @@ struct task_cputime {
#define prof_exp stime
#define sched_exp sum_exec_runtime
-#define INIT_CPUTIME \
- (struct task_cputime) { \
- .utime = 0, \
- .stime = 0, \
- .sum_exec_runtime = 0, \
- }
-
/*
* This is the atomic variant of task_cputime, which can be used for
* storing and updating task_cputime statistics without locking.
--
2.7.4
NSEC_PER_JIFFY is an ad-hoc redefinition of TICK_NSEC. Let's rather
use a unique and well maintained version.
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Wanpeng Li <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/jiffies.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 555e21f..c9fc296 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -27,19 +27,8 @@
#include "timekeeping.h"
-/* The Jiffies based clocksource is the lowest common
- * denominator clock source which should function on
- * all systems. It has the same coarse resolution as
- * the timer interrupt frequency HZ and it suffers
- * inaccuracies caused by missed or lost timer
- * interrupts and the inability for the timer
- * interrupt hardware to accuratly tick at the
- * requested HZ value. It is also not recommended
- * for "tick-less" systems.
- */
-#define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ)
-/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
+/* Since jiffies uses a simple TICK_NSEC multiplier
* conversion, the .shift value could be zero. However
* this would make NTP adjustments impossible as they are
* in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
@@ -47,8 +36,8 @@
* amount, and give ntp adjustments in units of 1/2^8
*
* The value 8 is somewhat carefully chosen, as anything
- * larger can result in overflows. NSEC_PER_JIFFY grows as
- * HZ shrinks, so values greater than 8 overflow 32bits when
+ * larger can result in overflows. TICK_NSEC grows as HZ
+ * shrinks, so values greater than 8 overflow 32bits when
* HZ=100.
*/
#if HZ < 34
@@ -64,12 +53,23 @@ static cycle_t jiffies_read(struct clocksource *cs)
return (cycle_t) jiffies;
}
+/*
+ * The Jiffies based clocksource is the lowest common
+ * denominator clock source which should function on
+ * all systems. It has the same coarse resolution as
+ * the timer interrupt frequency HZ and it suffers
+ * inaccuracies caused by missed or lost timer
+ * interrupts and the inability for the timer
+ * interrupt hardware to accuratly tick at the
+ * requested HZ value. It is also not recommended
+ * for "tick-less" systems.
+ */
static struct clocksource clocksource_jiffies = {
.name = "jiffies",
.rating = 1, /* lowest valid rating*/
.read = jiffies_read,
.mask = CLOCKSOURCE_MASK(32),
- .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
+ .mult = TICK_NSEC << JIFFIES_SHIFT, /* details above */
.shift = JIFFIES_SHIFT,
.max_cycles = 10,
};
@@ -125,7 +125,7 @@ int register_refined_jiffies(long cycles_per_second)
shift_hz += cycles_per_tick/2;
do_div(shift_hz, cycles_per_tick);
/* Calculate nsec_per_tick using shift_hz */
- nsec_per_tick = (u64)NSEC_PER_SEC << 8;
+ nsec_per_tick = (u64)TICK_NSEC << 8;
nsec_per_tick += (u32)shift_hz/2;
do_div(nsec_per_tick, (u32)shift_hz);
--
2.7.4
On Thu, 17 Nov 2016 19:08:07 +0100
Frederic Weisbecker <[email protected]> wrote:
> I'm sorry for the patchbomb, especially as I usually complain about
> these myself but I don't see any way to split this patchset into
> standalone pieces, none of which would make any sense... All I can do
> is to isolate about 3 cleanup patches.
On first glance the patches look ok-ish, but I am not happy about the
direction this takes.
I can understand the wish to consolidate the common code to a single
format which is nano-seconds. It will have repercussions though.
First the obvious problem, it does not compile for s390:
arch/s390/kernel/vtime.c: In function 'do_account_vtime':
arch/s390/kernel/vtime.c:140:25: error: implicit declaration of function
'cputime_to_nsecs' [-Werror=implicit-function-declaration]
account_user_time(tsk, cputime_to_nsecs(user));
^~~~~~~~~~~~~~~~
arch/s390/kernel/idle.c: In function 'enabled_wait':
arch/s390/kernel/idle.c:46:20: error: implicit declaration of function
'cputime_to_nsecs' [-Werror=implicit-function-declaration]
account_idle_time(cputime_to_nsecs(idle_time));
^~~~~~~~~~~~~~~~
arch/s390/kernel/idle.c: In function 'arch_cpu_idle_time':
arch/s390/kernel/idle.c:100:9: error: implicit declaration of function
'cputime_to_nsec' [-Werror=implicit-function-declaration]
return cputime_to_nsec(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0);
^~~~~~~~~~~~~~~
The error at idle.c:100 is a typo cputime_to_nsec vs cputime_to_nsecs.
The other two could probably be solved with an additional include but the
default cputime_to_nsecs is in include/linux/cputime.h is this:
#ifndef cputime_to_nsecs
# define cputime_to_nsecs(__ct) \
(cputime_to_usecs(__ct) * NSEC_PER_USEC)
#endif
which downgrades the accuracy for s390 from better than nano-seconds
to micro-seconds. Not good. For the s390 cputime format you would have
to do
static inline unsigned long long cputime_to_nsecs(const cputime_t cputime)
{
return ((__force unsigned long long) cputime * 1000) >> 12;
}
But this *example* function has an overflow problem.
> So currently, cputime_t serves the purpose, for s390 and
> powerpc (on CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y), to avoid converting
> arch clock counters to nanosecs or jiffies while accounting cputime.
The cputime_t has several purposes:
1) Allow for different units in the calculations for virtual cpu time.
There are currently three models: jiffies, nano-seconds and the native
TOD clock format for s390 which is a bit better than nano-seconds.
2) Act as a marker in the common code where a virtual cpu time is used.
This is more important than you might think, unfortunately it is very
easy to confuse a wall-clock delta with cpu time.
3) Avoid expensive operations on the fast path to convert the native cpu
time to something else. Instead move the expensive calculation to the
read-out code, e.g. fs/proc.
You patches breaks all three of these purposes. My main gripe is with 3).
> But this comes at the cost of a lot of complexity and uglification
> in the core code to deal with such an opaque type that relies on lots of
> mutators and accessors in order to deal with a random granularity time
> unit that also involve lots of workarounds and likely some performance
> penalties.
Having an opaque type with a set of helper functions is the whole point, no?
And I would not call the generic implementations for jiffies or nano-seconds
complex, these are easy enough to understand. And what are the performance
penalties you are talking about?
> So this patchset proposes to convert most of the cputime_t uses to nsecs.
> In the end it's only used by s390 and powerpc. This all comes at the
> expense of those two archs which then need to perform a cputime_to_nsec()
> conversion everytime they update the cputime to the core. Now I expect
> we can leverage this performance loss with flushing the cputime only on
> ticks so that we accumulate time as cputime_t in between and make the
> conversions more rare.
It is not just one cputime_to_nsec that we would have to add but several.
Three in do_account_vtime and one in vtime_account_irq_enter.
The do_account_vtime function is called once per jiffy and once per task
switch. HZ is usually set to 100 for s390, the conversion once per jiffy
would not be so bad, but the call on the scheduling path *will* hurt.
What is even worse is the vtime_account_irq_enter path, that is call several
times for each *interrupt*, at least two times for an interrupt without
additional processing and four times if a softirq is triggered.
Now it has been proposed to implement lazy accounting to accumulate deltas
and do the expensive conversions only infrequently. This is pretty straight-
forward for account_user_time but to do this for the account_system_time
function is more complicated. The function has to differentiate between
guest/hardirq/softirq and pure system time. We would need to keep sums for
each bucket and provide a separate function to add to each bucket. Like
account_guest_time(), account_hardirq_time(), account_softirq_time() and
account_system_time(). Then it is up to the arch code to sort out the details
and call the accounting code once per jiffy for each of the buckets.
We still have to do the whole thing on each task switch though.
But I am still not happy about the approach. What is the compelling reason
for this change except for the "but it looks ugly"?
--
blue skies,
Martin.
"Reality continues to ruin my life." - Calvin.
On Fri, Nov 18, 2016 at 01:08:46PM +0100, Martin Schwidefsky wrote:
> On Thu, 17 Nov 2016 19:08:07 +0100
> Frederic Weisbecker <[email protected]> wrote:
>
> > I'm sorry for the patchbomb, especially as I usually complain about
> > these myself but I don't see any way to split this patchset into
> > standalone pieces, none of which would make any sense... All I can do
> > is to isolate about 3 cleanup patches.
>
> On first glance the patches look ok-ish, but I am not happy about the
> direction this takes.
>
> I can understand the wish to consolidate the common code to a single
> format which is nano-seconds. It will have repercussions though.
>
> First the obvious problem, it does not compile for s390:
>
> arch/s390/kernel/vtime.c: In function 'do_account_vtime':
> arch/s390/kernel/vtime.c:140:25: error: implicit declaration of function
> 'cputime_to_nsecs' [-Werror=implicit-function-declaration]
> account_user_time(tsk, cputime_to_nsecs(user));
> ^~~~~~~~~~~~~~~~
> arch/s390/kernel/idle.c: In function 'enabled_wait':
> arch/s390/kernel/idle.c:46:20: error: implicit declaration of function
> 'cputime_to_nsecs' [-Werror=implicit-function-declaration]
> account_idle_time(cputime_to_nsecs(idle_time));
> ^~~~~~~~~~~~~~~~
> arch/s390/kernel/idle.c: In function 'arch_cpu_idle_time':
> arch/s390/kernel/idle.c:100:9: error: implicit declaration of function
> 'cputime_to_nsec' [-Werror=implicit-function-declaration]
> return cputime_to_nsec(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0);
> ^~~~~~~~~~~~~~~
Yes sorry I haven't yet done much build-testing. I should have written that it's
not build-tested yet. This patchset in its current state is rather an RFC.
> The error at idle.c:100 is a typo cputime_to_nsec vs cputime_to_nsecs.
> The other two could probably be solved with an additional include but the
> default cputime_to_nsecs is in include/linux/cputime.h is this:
>
> #ifndef cputime_to_nsecs
> # define cputime_to_nsecs(__ct) \
> (cputime_to_usecs(__ct) * NSEC_PER_USEC)
> #endif
>
> which downgrades the accuracy for s390 from better than nano-seconds
> to micro-seconds. Not good. For the s390 cputime format you would have
> to do
>
> static inline unsigned long long cputime_to_nsecs(const cputime_t cputime)
> {
> return ((__force unsigned long long) cputime * 1000) >> 12;
> }
I agree, that loss of acurracy is my biggest worry. Hence the accumulation
idea, but more about that later.
>
> But this *example* function has an overflow problem.
>
> > So currently, cputime_t serves the purpose, for s390 and
> > powerpc (on CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y), to avoid converting
> > arch clock counters to nanosecs or jiffies while accounting cputime.
>
> The cputime_t has several purposes:
> 1) Allow for different units in the calculations for virtual cpu time.
> There are currently three models: jiffies, nano-seconds and the native
> TOD clock format for s390 which is a bit better than nano-seconds.
Sure, I don't disagree with that, just with the way it is done (ie: stored
and maintained in the core to this very obscure type).
> 2) Act as a marker in the common code where a virtual cpu time is used.
> This is more important than you might think, unfortunately it is very
> easy to confuse a wall-clock delta with cpu time.
There you lost me, I don't get which confusion you're pointing.
> 3) Avoid expensive operations on the fast path to convert the native cpu
> time to something else. Instead move the expensive calculation to the
> read-out code, e.g. fs/proc.
>
> You patches breaks all three of these purposes. My main gripe is with 3).
>
> > But this comes at the cost of a lot of complexity and uglification
> > in the core code to deal with such an opaque type that relies on lots of
> > mutators and accessors in order to deal with a random granularity time
> > unit that also involve lots of workarounds and likely some performance
> > penalties.
>
> Having an opaque type with a set of helper functions is the whole point, no?
> And I would not call the generic implementations for jiffies or nano-seconds
> complex, these are easy enough to understand. And what are the performance
> penalties you are talking about?
Just because some code isn't too complex doesn't mean we really want to keep it.
I get regular questions about what unit does cputime_t map to on a given
configuration. Everybody gets confused about that. On many of the
patches we got on cputime for the last years, I had to fix quite some issues
with bad granularity assumption. In fact most fixes that came to kernel/sched/cputime.c
recently, after merge or review, were about people getting confused with cputime_t granularity.
Especially for stats that come from nsecs clocks (steal and irqtime), we always have to maintain an
accumulator and make sure we don't lose some nanosec deltas.
And we have to maintain several workarounds, sometimes even in the fastpath in
order to cope with the cputime_t random granularity all over.
Some fastpath examples:
* steal time accounting (need to convert nsecs to cputime then back)
* irqtime accounting (maintain accumulators)
* cputime_adjust, used on any user read of cputime (need to convert from nsecs
to cputime on cputime_adjust)
But the worst really is about maintainance. This patchset removes around 600 lines.
>
> > So this patchset proposes to convert most of the cputime_t uses to nsecs.
> > In the end it's only used by s390 and powerpc. This all comes at the
> > expense of those two archs which then need to perform a cputime_to_nsec()
> > conversion everytime they update the cputime to the core. Now I expect
> > we can leverage this performance loss with flushing the cputime only on
> > ticks so that we accumulate time as cputime_t in between and make the
> > conversions more rare.
>
> It is not just one cputime_to_nsec that we would have to add but several.
> Three in do_account_vtime and one in vtime_account_irq_enter.
>
> The do_account_vtime function is called once per jiffy and once per task
> switch. HZ is usually set to 100 for s390, the conversion once per jiffy
> would not be so bad, but the call on the scheduling path *will* hurt.
I don't think we need to flush on task switch. If we maintain the accumulators
on the task/thread struct instead of per-cpu, then the remaining time after
task switch out will be accounted on next tick after after next task switch in.
> What is even worse is the vtime_account_irq_enter path, that is call several
> times for each *interrupt*, at least two times for an interrupt without
> additional processing and four times if a softirq is triggered.
Actually maintaining an accumulator to flush on ticks is probably going to increase
the perf because of that. account_system_time() is called twice per interrupt, and
such function do much more than just account the time to the task_struct and cpustat
fields. The same applies to userspace boundaries and context switch. The account_*_time()
functions can be expensive.
>
> Now it has been proposed to implement lazy accounting to accumulate deltas
> and do the expensive conversions only infrequently. This is pretty straight-
> forward for account_user_time but to do this for the account_system_time
> function is more complicated. The function has to differentiate between
> guest/hardirq/softirq and pure system time. We would need to keep sums for
> each bucket and provide a separate function to add to each bucket. Like
> account_guest_time(), account_hardirq_time(), account_softirq_time() and
> account_system_time(). Then it is up to the arch code to sort out the details
> and call the accounting code once per jiffy for each of the buckets.
That wouldn't be too hard really. The s390 code in vtime.c already does that.
> We still have to do the whole thing on each task switch though.
Not if we maintain the deltas in the task_struct.
>
> But I am still not happy about the approach. What is the compelling reason
> for this change except for the "but it looks ugly"?
The diffstat (600 lines removed). Also the fact that we have all these workarounds in the
core code just for the special case of 1 arch (s390) and a half (powerpc with CONFIG_VIRT_CPU_ACCOUNTING_NATIVE).
I'd much rather have all that complexity moved in a vtime_native.c shared by s390 and powerpc
that takes care of proper accumulation in cputime_t and flushes that on ticks in nsecs rather
than having all these cputime_t game all over the kernel.
On Fri, 18 Nov 2016 15:47:02 +0100
Frederic Weisbecker <[email protected]> wrote:
> On Fri, Nov 18, 2016 at 01:08:46PM +0100, Martin Schwidefsky wrote:
> > On Thu, 17 Nov 2016 19:08:07 +0100
> > Frederic Weisbecker <[email protected]> wrote:
> >
> > > I'm sorry for the patchbomb, especially as I usually complain about
> > > these myself but I don't see any way to split this patchset into
> > > standalone pieces, none of which would make any sense... All I can do
> > > is to isolate about 3 cleanup patches.
> >
> > On first glance the patches look ok-ish, but I am not happy about the
> > direction this takes.
> >
> > I can understand the wish to consolidate the common code to a single
> > format which is nano-seconds. It will have repercussions though.
> >
> > First the obvious problem, it does not compile for s390:
> >
> > arch/s390/kernel/vtime.c: In function 'do_account_vtime':
> > arch/s390/kernel/vtime.c:140:25: error: implicit declaration of function
> > 'cputime_to_nsecs' [-Werror=implicit-function-declaration]
> > account_user_time(tsk, cputime_to_nsecs(user));
> > ^~~~~~~~~~~~~~~~
> > arch/s390/kernel/idle.c: In function 'enabled_wait':
> > arch/s390/kernel/idle.c:46:20: error: implicit declaration of function
> > 'cputime_to_nsecs' [-Werror=implicit-function-declaration]
> > account_idle_time(cputime_to_nsecs(idle_time));
> > ^~~~~~~~~~~~~~~~
> > arch/s390/kernel/idle.c: In function 'arch_cpu_idle_time':
> > arch/s390/kernel/idle.c:100:9: error: implicit declaration of function
> > 'cputime_to_nsec' [-Werror=implicit-function-declaration]
> > return cputime_to_nsec(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0);
> > ^~~~~~~~~~~~~~~
>
> Yes sorry I haven't yet done much build-testing. I should have written that it's
> not build-tested yet. This patchset in its current state is rather an RFC.
No big deal, I got it to compile with a small change.
> > The error at idle.c:100 is a typo cputime_to_nsec vs cputime_to_nsecs.
> > The other two could probably be solved with an additional include but the
> > default cputime_to_nsecs is in include/linux/cputime.h is this:
> >
> > #ifndef cputime_to_nsecs
> > # define cputime_to_nsecs(__ct) \
> > (cputime_to_usecs(__ct) * NSEC_PER_USEC)
> > #endif
> >
> > which downgrades the accuracy for s390 from better than nano-seconds
> > to micro-seconds. Not good. For the s390 cputime format you would have
> > to do
> >
> > static inline unsigned long long cputime_to_nsecs(const cputime_t cputime)
> > {
> > return ((__force unsigned long long) cputime * 1000) >> 12;
> > }
>
> I agree, that loss of acurracy is my biggest worry. Hence the accumulation
> idea, but more about that later.
We can not allow that to happen, but the accumulation should take care of it.
> >
> > But this *example* function has an overflow problem.
> >
> > > So currently, cputime_t serves the purpose, for s390 and
> > > powerpc (on CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y), to avoid converting
> > > arch clock counters to nanosecs or jiffies while accounting cputime.
> >
> > The cputime_t has several purposes:
> > 1) Allow for different units in the calculations for virtual cpu time.
> > There are currently three models: jiffies, nano-seconds and the native
> > TOD clock format for s390 which is a bit better than nano-seconds.
>
> Sure, I don't disagree with that, just with the way it is done (ie: stored
> and maintained in the core to this very obscure type).
>
> > 2) Act as a marker in the common code where a virtual cpu time is used.
> > This is more important than you might think, unfortunately it is very
> > easy to confuse a wall-clock delta with cpu time.
>
> There you lost me, I don't get which confusion you're pointing.
The confusion stems from the fact that you do *not* have a simple nano-second
value but a modal value that depends on the architecture. More below..
> > 3) Avoid expensive operations on the fast path to convert the native cpu
> > time to something else. Instead move the expensive calculation to the
> > read-out code, e.g. fs/proc.
> >
> > You patches breaks all three of these purposes. My main gripe is with 3).
> >
> > > But this comes at the cost of a lot of complexity and uglification
> > > in the core code to deal with such an opaque type that relies on lots of
> > > mutators and accessors in order to deal with a random granularity time
> > > unit that also involve lots of workarounds and likely some performance
> > > penalties.
> >
> > Having an opaque type with a set of helper functions is the whole point, no?
> > And I would not call the generic implementations for jiffies or nano-seconds
> > complex, these are easy enough to understand. And what are the performance
> > penalties you are talking about?
>
> Just because some code isn't too complex doesn't mean we really want to keep it.
> I get regular questions about what unit does cputime_t map to on a given
> configuration. Everybody gets confused about that. On many of the
> patches we got on cputime for the last years, I had to fix quite some issues
> with bad granularity assumption. In fact most fixes that came to kernel/sched/cputime.c
> recently, after merge or review, were about people getting confused with cputime_t granularity.
These regular question you get about the cputime_t is exactly what I was referring
to. If the value would just be a u64 the guys asking the question about cputime_t
would just assume the value to be nano-seconds and then go ahead and break things.
> Especially for stats that come from nsecs clocks (steal and irqtime), we always have to maintain an
> accumulator and make sure we don't lose some nanosec deltas.
Yes, for the CONFIG_IRQ_TIME_ACCOUNTING=y case.
> And we have to maintain several workarounds, sometimes even in the fastpath in
> order to cope with the cputime_t random granularity all over.
>
> Some fastpath examples:
>
> * steal time accounting (need to convert nsecs to cputime then back)
> * irqtime accounting (maintain accumulators)
> * cputime_adjust, used on any user read of cputime (need to convert from nsecs
> to cputime on cputime_adjust)
>
> But the worst really is about maintainance. This patchset removes around 600 lines.
Well 300 lines is from the powerpc and s390 cputime.h header and ~200 from
the generic cputime_jiffies.h and cputime_nsecs.h.
> >
> > > So this patchset proposes to convert most of the cputime_t uses to nsecs.
> > > In the end it's only used by s390 and powerpc. This all comes at the
> > > expense of those two archs which then need to perform a cputime_to_nsec()
> > > conversion everytime they update the cputime to the core. Now I expect
> > > we can leverage this performance loss with flushing the cputime only on
> > > ticks so that we accumulate time as cputime_t in between and make the
> > > conversions more rare.
> >
> > It is not just one cputime_to_nsec that we would have to add but several.
> > Three in do_account_vtime and one in vtime_account_irq_enter.
> >
> > The do_account_vtime function is called once per jiffy and once per task
> > switch. HZ is usually set to 100 for s390, the conversion once per jiffy
> > would not be so bad, but the call on the scheduling path *will* hurt.
>
> I don't think we need to flush on task switch. If we maintain the accumulators
> on the task/thread struct instead of per-cpu, then the remaining time after
> task switch out will be accounted on next tick after after next task switch in.
You can not properly calculate steal time if you allow sleeping tasks to sit on
up to 5*HZ worth of cpu time. I think we *have* to do accounting on task switch.
At least on s390, likely on powerpc as well. Why not make that an option for
the architecture with the yet-to-be-written accumulating code.
> > What is even worse is the vtime_account_irq_enter path, that is call several
> > times for each *interrupt*, at least two times for an interrupt without
> > additional processing and four times if a softirq is triggered.
>
> Actually maintaining an accumulator to flush on ticks is probably going to increase
> the perf because of that. account_system_time() is called twice per interrupt, and
> such function do much more than just account the time to the task_struct and cpustat
> fields. The same applies to userspace boundaries and context switch. The account_*_time()
> functions can be expensive.
The account_system_time twice per interrupt can be removed with the accumulation
idea. We will have to see how expensive the accounting_xxx_time calls are on
the context switch path.
> >
> > Now it has been proposed to implement lazy accounting to accumulate deltas
> > and do the expensive conversions only infrequently. This is pretty straight-
> > forward for account_user_time but to do this for the account_system_time
> > function is more complicated. The function has to differentiate between
> > guest/hardirq/softirq and pure system time. We would need to keep sums for
> > each bucket and provide a separate function to add to each bucket. Like
> > account_guest_time(), account_hardirq_time(), account_softirq_time() and
> > account_system_time(). Then it is up to the arch code to sort out the details
> > and call the accounting code once per jiffy for each of the buckets.
>
> That wouldn't be too hard really. The s390 code in vtime.c already does that.
Yes, I agree that the accumulating change would not be too hard. Can I make the
request that we try to get that done first before doing the cleanup ?
> > We still have to do the whole thing on each task switch though.
>
> Not if we maintain the deltas in the task_struct.
>
> >
> > But I am still not happy about the approach. What is the compelling reason
> > for this change except for the "but it looks ugly"?
>
> The diffstat (600 lines removed). Also the fact that we have all these workarounds
> in the core code just for the special case of 1 arch (s390) and a half
> (powerpc with CONFIG_VIRT_CPU_ACCOUNTING_NATIVE).
>
> I'd much rather have all that complexity moved in a vtime_native.c shared by s390 and powerpc
> that takes care of proper accumulation in cputime_t and flushes that on ticks in nsecs rather
> than having all these cputime_t game all over the kernel.
The goal to have nano-seconds only in the core code is a good one. And with the
accumulator I think s390 can live with it. The change would have a real upside
too. There are these stupid divisions for scaled cputime that we have to calculate
for every call to account_xxx_time(). These would not be done for the interrupts
anymore.
--
blue skies,
Martin.
"Reality continues to ruin my life." - Calvin.
* Frederic Weisbecker <[email protected]> wrote:
> > But I am still not happy about the approach. What is the compelling reason for
> > this change except for the "but it looks ugly"?
>
> The diffstat (600 lines removed). Also the fact that we have all these
> workarounds in the core code just for the special case of 1 arch (s390) and a
> half (powerpc with CONFIG_VIRT_CPU_ACCOUNTING_NATIVE).
>
> I'd much rather have all that complexity moved in a vtime_native.c shared by
> s390 and powerpc that takes care of proper accumulation in cputime_t and flushes
> that on ticks in nsecs rather than having all these cputime_t game all over the
> kernel.
I agree - we really want to concentrate complexity in such a fashion and generally
standardize on nanosecs, and the diffstat of the patchset is really nice.
The patchset obviously has to build (and work!) on s390/powerpc properly.
Thanks,
Ingo
On Mon, 21 Nov 2016 07:59:56 +0100
Martin Schwidefsky <[email protected]> wrote:
> On Fri, 18 Nov 2016 15:47:02 +0100
> Frederic Weisbecker <[email protected]> wrote:
>
> > On Fri, Nov 18, 2016 at 01:08:46PM +0100, Martin Schwidefsky wrote:
> > > On Thu, 17 Nov 2016 19:08:07 +0100
> > > Frederic Weisbecker <[email protected]> wrote:
> > >
> > > Now it has been proposed to implement lazy accounting to accumulate deltas
> > > and do the expensive conversions only infrequently. This is pretty straight-
> > > forward for account_user_time but to do this for the account_system_time
> > > function is more complicated. The function has to differentiate between
> > > guest/hardirq/softirq and pure system time. We would need to keep sums for
> > > each bucket and provide a separate function to add to each bucket. Like
> > > account_guest_time(), account_hardirq_time(), account_softirq_time() and
> > > account_system_time(). Then it is up to the arch code to sort out the details
> > > and call the accounting code once per jiffy for each of the buckets.
> >
> > That wouldn't be too hard really. The s390 code in vtime.c already does that.
>
> Yes, I agree that the accumulating change would not be too hard. Can I make the
> request that we try to get that done first before doing the cleanup ?
Played with the idea a bit, here is a prototype patch to do the delay system time
accounting for s390. It applies against the latest s390 features tree which you'll
find here
git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git features
The details probably needs some more work but it works.
--
>From 1b5ef9ddf899da81a48de826f783b15e6fc45d25 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <[email protected]>
Date: Mon, 21 Nov 2016 10:44:10 +0100
Subject: [PATCH] s390/cputime: delayed accounting of system time
The account_system_time() function is called with a cputime that
occurred while running in the kernel. The function detects which
context the CPU is currently running in and accounts the time to
the correct bucket. This forces the arch code to account the
cputime for hardirq and softirq immediately.
Make account_guest_time non-static and add account_sys_time,
account_hardirq_time and account_softirq_time. With these functions
the arch code can delay the accounting for system time. For s390
the accounting is done once per timer tick and for each task switch.
Signed-off-by: Martin Schwidefsky <[email protected]>
---
arch/s390/include/asm/lowcore.h | 65 ++++++++++++-----------
arch/s390/include/asm/processor.h | 3 ++
arch/s390/kernel/vtime.c | 106 ++++++++++++++++++++++----------------
include/linux/kernel_stat.h | 13 +++--
kernel/sched/cputime.c | 22 +++++++-
5 files changed, 129 insertions(+), 80 deletions(-)
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 62a5cf1..8a5b082 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -85,53 +85,56 @@ struct lowcore {
__u64 mcck_enter_timer; /* 0x02c0 */
__u64 exit_timer; /* 0x02c8 */
__u64 user_timer; /* 0x02d0 */
- __u64 system_timer; /* 0x02d8 */
- __u64 steal_timer; /* 0x02e0 */
- __u64 last_update_timer; /* 0x02e8 */
- __u64 last_update_clock; /* 0x02f0 */
- __u64 int_clock; /* 0x02f8 */
- __u64 mcck_clock; /* 0x0300 */
- __u64 clock_comparator; /* 0x0308 */
+ __u64 guest_timer; /* 0x02d8 */
+ __u64 system_timer; /* 0x02e0 */
+ __u64 hardirq_timer; /* 0x02e8 */
+ __u64 softirq_timer; /* 0x02f0 */
+ __u64 steal_timer; /* 0x02f8 */
+ __u64 last_update_timer; /* 0x0300 */
+ __u64 last_update_clock; /* 0x0308 */
+ __u64 int_clock; /* 0x0310 */
+ __u64 mcck_clock; /* 0x0318 */
+ __u64 clock_comparator; /* 0x0320 */
/* Current process. */
- __u64 current_task; /* 0x0310 */
- __u8 pad_0x318[0x320-0x318]; /* 0x0318 */
- __u64 kernel_stack; /* 0x0320 */
+ __u64 current_task; /* 0x0328 */
+ __u8 pad_0x318[0x320-0x318]; /* 0x0330 */
+ __u64 kernel_stack; /* 0x0338 */
/* Interrupt, panic and restart stack. */
- __u64 async_stack; /* 0x0328 */
- __u64 panic_stack; /* 0x0330 */
- __u64 restart_stack; /* 0x0338 */
+ __u64 async_stack; /* 0x0340 */
+ __u64 panic_stack; /* 0x0348 */
+ __u64 restart_stack; /* 0x0350 */
/* Restart function and parameter. */
- __u64 restart_fn; /* 0x0340 */
- __u64 restart_data; /* 0x0348 */
- __u64 restart_source; /* 0x0350 */
+ __u64 restart_fn; /* 0x0358 */
+ __u64 restart_data; /* 0x0360 */
+ __u64 restart_source; /* 0x0368 */
/* Address space pointer. */
- __u64 kernel_asce; /* 0x0358 */
- __u64 user_asce; /* 0x0360 */
+ __u64 kernel_asce; /* 0x0370 */
+ __u64 user_asce; /* 0x0378 */
/*
* The lpp and current_pid fields form a
* 64-bit value that is set as program
* parameter with the LPP instruction.
*/
- __u32 lpp; /* 0x0368 */
- __u32 current_pid; /* 0x036c */
+ __u32 lpp; /* 0x0380 */
+ __u32 current_pid; /* 0x0384 */
/* SMP info area */
- __u32 cpu_nr; /* 0x0370 */
- __u32 softirq_pending; /* 0x0374 */
- __u64 percpu_offset; /* 0x0378 */
- __u64 vdso_per_cpu_data; /* 0x0380 */
- __u64 machine_flags; /* 0x0388 */
- __u32 preempt_count; /* 0x0390 */
- __u8 pad_0x0394[0x0398-0x0394]; /* 0x0394 */
- __u64 gmap; /* 0x0398 */
- __u32 spinlock_lockval; /* 0x03a0 */
- __u32 fpu_flags; /* 0x03a4 */
- __u8 pad_0x03a8[0x0400-0x03a8]; /* 0x03a8 */
+ __u32 cpu_nr; /* 0x0388 */
+ __u32 softirq_pending; /* 0x038c */
+ __u64 percpu_offset; /* 0x0390 */
+ __u64 vdso_per_cpu_data; /* 0x0398 */
+ __u64 machine_flags; /* 0x03a0 */
+ __u32 preempt_count; /* 0x03a8 */
+ __u8 pad_0x03ac[0x03b0-0x03ac]; /* 0x03ac */
+ __u64 gmap; /* 0x03b0 */
+ __u32 spinlock_lockval; /* 0x03b8 */
+ __u32 fpu_flags; /* 0x03bc */
+ __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */
/* Per cpu primary space access list */
__u32 paste[16]; /* 0x0400 */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index bf8b2e2..0234eea 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -111,7 +111,10 @@ struct thread_struct {
unsigned int acrs[NUM_ACRS];
unsigned long ksp; /* kernel stack pointer */
unsigned long user_timer; /* task cputime in user space */
+ unsigned long guest_timer; /* task cputime in kvm guest */
unsigned long system_timer; /* task cputime in kernel space */
+ unsigned long hardirq_timer; /* task cputime in hardirq context */
+ unsigned long softirq_timer; /* task cputime in softirq context */
unsigned long sys_call_table; /* system call table address */
mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 9a6c957..fa262c2 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -90,14 +90,23 @@ static void update_mt_scaling(void)
__this_cpu_write(mt_scaling_jiffies, jiffies_64);
}
+static inline u64 scale_vtime(u64 vtime)
+{
+ u64 mult = __this_cpu_read(mt_scaling_mult);
+ u64 div = __this_cpu_read(mt_scaling_div);
+
+ if (smp_cpu_mtid)
+ return vtime * mult / div;
+ return vtime;
+}
+
/*
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
{
- u64 timer, clock, user, system, steal;
- u64 user_scaled, system_scaled;
+ u64 timer, clock, user, guest, system, hardirq, softirq, steal;
timer = S390_lowcore.last_update_timer;
clock = S390_lowcore.last_update_clock;
@@ -110,34 +119,48 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
#endif
: "=m" (S390_lowcore.last_update_timer),
"=m" (S390_lowcore.last_update_clock));
- S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
- S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
+ clock = S390_lowcore.last_update_clock - clock;
+ timer -= S390_lowcore.last_update_timer;
+
+ if ((tsk->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0))
+ S390_lowcore.guest_timer += timer;
+ else if (hardirq_count() - hardirq_offset)
+ S390_lowcore.hardirq_timer += timer;
+ else if (in_serving_softirq())
+ S390_lowcore.softirq_timer += timer;
+ else
+ S390_lowcore.system_timer += timer;
/* Update MT utilization calculation */
if (smp_cpu_mtid &&
time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
update_mt_scaling();
+ /* Calculate cputime delta */
user = S390_lowcore.user_timer - tsk->thread.user_timer;
- S390_lowcore.steal_timer -= user;
tsk->thread.user_timer = S390_lowcore.user_timer;
-
+ guest = S390_lowcore.guest_timer - tsk->thread.guest_timer;
+ tsk->thread.guest_timer = S390_lowcore.guest_timer;
system = S390_lowcore.system_timer - tsk->thread.system_timer;
- S390_lowcore.steal_timer -= system;
tsk->thread.system_timer = S390_lowcore.system_timer;
-
- user_scaled = user;
- system_scaled = system;
- /* Do MT utilization scaling */
- if (smp_cpu_mtid) {
- u64 mult = __this_cpu_read(mt_scaling_mult);
- u64 div = __this_cpu_read(mt_scaling_div);
-
- user_scaled = (user_scaled * mult) / div;
- system_scaled = (system_scaled * mult) / div;
- }
- account_user_time(tsk, user, user_scaled);
- account_system_time(tsk, hardirq_offset, system, system_scaled);
+ hardirq = S390_lowcore.hardirq_timer - tsk->thread.hardirq_timer;
+ tsk->thread.hardirq_timer = S390_lowcore.hardirq_timer;
+ softirq = S390_lowcore.softirq_timer - tsk->thread.softirq_timer;
+ tsk->thread.softirq_timer = S390_lowcore.softirq_timer;
+ S390_lowcore.steal_timer +=
+ clock - user - guest - system - hardirq - softirq;
+
+ /* Push account value */
+ if (user)
+ account_user_time(tsk, user, scale_vtime(user));
+ if (guest)
+ account_guest_time(tsk, guest, scale_vtime(guest));
+ if (system)
+ account_sys_time(tsk, system, scale_vtime(system));
+ if (hardirq)
+ account_hardirq_time(tsk, hardirq, scale_vtime(hardirq));
+ if (softirq)
+ account_softirq_time(tsk, softirq, scale_vtime(softirq));
steal = S390_lowcore.steal_timer;
if ((s64) steal > 0) {
@@ -145,16 +168,22 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
account_steal_time(steal);
}
- return virt_timer_forward(user + system);
+ return virt_timer_forward(user + guest + system + hardirq + softirq);
}
void vtime_task_switch(struct task_struct *prev)
{
do_account_vtime(prev, 0);
prev->thread.user_timer = S390_lowcore.user_timer;
+ prev->thread.guest_timer = S390_lowcore.guest_timer;
prev->thread.system_timer = S390_lowcore.system_timer;
+ prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
+ prev->thread.softirq_timer = S390_lowcore.softirq_timer;
S390_lowcore.user_timer = current->thread.user_timer;
+ S390_lowcore.guest_timer = current->thread.guest_timer;
S390_lowcore.system_timer = current->thread.system_timer;
+ S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
+ S390_lowcore.softirq_timer = current->thread.softirq_timer;
}
/*
@@ -174,31 +203,22 @@ void vtime_account_user(struct task_struct *tsk)
*/
void vtime_account_irq_enter(struct task_struct *tsk)
{
- u64 timer, system, system_scaled;
+ u64 timer;
timer = S390_lowcore.last_update_timer;
S390_lowcore.last_update_timer = get_vtimer();
- S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
-
- /* Update MT utilization calculation */
- if (smp_cpu_mtid &&
- time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
- update_mt_scaling();
-
- system = S390_lowcore.system_timer - tsk->thread.system_timer;
- S390_lowcore.steal_timer -= system;
- tsk->thread.system_timer = S390_lowcore.system_timer;
- system_scaled = system;
- /* Do MT utilization scaling */
- if (smp_cpu_mtid) {
- u64 mult = __this_cpu_read(mt_scaling_mult);
- u64 div = __this_cpu_read(mt_scaling_div);
-
- system_scaled = (system_scaled * mult) / div;
- }
- account_system_time(tsk, 0, system, system_scaled);
-
- virt_timer_forward(system);
+ timer -= S390_lowcore.last_update_timer;
+
+ if ((tsk->flags & PF_VCPU) && (irq_count() == 0))
+ S390_lowcore.guest_timer += timer;
+ else if (hardirq_count())
+ S390_lowcore.hardirq_timer += timer;
+ else if (in_serving_softirq())
+ S390_lowcore.softirq_timer += timer;
+ else
+ S390_lowcore.system_timer += timer;
+
+ virt_timer_forward(timer);
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 44fda64..ec3e900 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -78,10 +78,15 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
return kstat_cpu(cpu).irqs_sum;
}
-extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
-extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
-extern void account_steal_time(cputime_t);
-extern void account_idle_time(cputime_t);
+void account_user_time(struct task_struct *, cputime_t, cputime_t);
+void account_guest_time(struct task_struct *, cputime_t, cputime_t);
+void account_sys_time(struct task_struct *, cputime_t, cputime_t);
+void account_hardirq_time(struct task_struct *, cputime_t, cputime_t);
+void account_softirq_time(struct task_struct *, cputime_t, cputime_t);
+void account_steal_time(cputime_t);
+void account_idle_time(cputime_t);
+
+void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5ebee31..042f1a3 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -155,8 +155,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
* @cputime: the cpu time spent in virtual machine since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
-static void account_guest_time(struct task_struct *p, cputime_t cputime,
- cputime_t cputime_scaled)
+void account_guest_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
@@ -226,6 +226,24 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
__account_system_time(p, cputime, cputime_scaled, index);
}
+void account_sys_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled)
+{
+ __account_system_time(p, cputime, cputime_scaled, CPUTIME_SYSTEM);
+}
+
+void account_hardirq_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled)
+{
+ __account_system_time(p, cputime, cputime_scaled, CPUTIME_IRQ);
+}
+
+void account_softirq_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled)
+{
+ __account_system_time(p, cputime, cputime_scaled, CPUTIME_SOFTIRQ);
+}
+
/*
* Account for involuntary wait time.
* @cputime: the cpu time spent in involuntary wait
--
2.8.4
--
blue skies,
Martin.
"Reality continues to ruin my life." - Calvin.
On Mon, Nov 21, 2016 at 07:59:56AM +0100, Martin Schwidefsky wrote:
> On Fri, 18 Nov 2016 15:47:02 +0100
> Frederic Weisbecker <[email protected]> wrote:
> > Just because some code isn't too complex doesn't mean we really want to keep it.
> > I get regular questions about what unit does cputime_t map to on a given
> > configuration. Everybody gets confused about that. On many of the
> > patches we got on cputime for the last years, I had to fix quite some issues
> > with bad granularity assumption. In fact most fixes that came to kernel/sched/cputime.c
> > recently, after merge or review, were about people getting confused with cputime_t granularity.
>
> These regular question you get about the cputime_t is exactly what I was referring
> to. If the value would just be a u64 the guys asking the question about cputime_t
> would just assume the value to be nano-seconds and then go ahead and break things.
Sure, replacing cputime_t with u64 without changing the unit wouldn't help. But changing
it to nsecs and expect people to deduce it from the u64 type sounds a good direction.
>
> > Especially for stats that come from nsecs clocks (steal and irqtime), we always have to maintain an
> > accumulator and make sure we don't lose some nanosec deltas.
>
> Yes, for the CONFIG_IRQ_TIME_ACCOUNTING=y case.
Right.
>
> > And we have to maintain several workarounds, sometimes even in the fastpath in
> > order to cope with the cputime_t random granularity all over.
> >
> > Some fastpath examples:
> >
> > * steal time accounting (need to convert nsecs to cputime then back)
> > * irqtime accounting (maintain accumulators)
> > * cputime_adjust, used on any user read of cputime (need to convert from nsecs
> > to cputime on cputime_adjust)
> >
> > But the worst really is about maintainance. This patchset removes around 600 lines.
>
> Well 300 lines is from the powerpc and s390 cputime.h header and ~200 from
> the generic cputime_jiffies.h and cputime_nsecs.h.
Well, still worth it :-)
> > > The do_account_vtime function is called once per jiffy and once per task
> > > switch. HZ is usually set to 100 for s390, the conversion once per jiffy
> > > would not be so bad, but the call on the scheduling path *will* hurt.
> >
> > I don't think we need to flush on task switch. If we maintain the accumulators
> > on the task/thread struct instead of per-cpu, then the remaining time after
> > task switch out will be accounted on next tick after after next task switch in.
>
> You can not properly calculate steal time if you allow sleeping tasks to sit on
> up to 5*HZ worth of cpu time.
Ah, you mean that when the task goes to sleep, we shouldn't miss more than one
tick worth of system/user time but the steal time can be much higher, right?
> I think we *have* to do accounting on task switch.
> At least on s390, likely on powerpc as well. Why not make that an option for
> the architecture with the yet-to-be-written accumulating code.
Ok, how about doing the accumulation and always account on task switch for now,
we'll see later if it's worth having such an option.
>
> > > What is even worse is the vtime_account_irq_enter path, that is call several
> > > times for each *interrupt*, at least two times for an interrupt without
> > > additional processing and four times if a softirq is triggered.
> >
> > Actually maintaining an accumulator to flush on ticks is probably going to increase
> > the perf because of that. account_system_time() is called twice per interrupt, and
> > such function do much more than just account the time to the task_struct and cpustat
> > fields. The same applies to userspace boundaries and context switch. The account_*_time()
> > functions can be expensive.
>
> The account_system_time twice per interrupt can be removed with the accumulation
> idea. We will have to see how expensive the accounting_xxx_time calls are on
> the context switch path.
Right.
>
> > >
> > > Now it has been proposed to implement lazy accounting to accumulate deltas
> > > and do the expensive conversions only infrequently. This is pretty straight-
> > > forward for account_user_time but to do this for the account_system_time
> > > function is more complicated. The function has to differentiate between
> > > guest/hardirq/softirq and pure system time. We would need to keep sums for
> > > each bucket and provide a separate function to add to each bucket. Like
> > > account_guest_time(), account_hardirq_time(), account_softirq_time() and
> > > account_system_time(). Then it is up to the arch code to sort out the details
> > > and call the accounting code once per jiffy for each of the buckets.
> >
> > That wouldn't be too hard really. The s390 code in vtime.c already does that.
>
> Yes, I agree that the accumulating change would not be too hard. Can I make the
> request that we try to get that done first before doing the cleanup ?
Of course. I see you started something, I'll be glad to help!
>
> > > We still have to do the whole thing on each task switch though.
> >
> > Not if we maintain the deltas in the task_struct.
> >
> > >
> > > But I am still not happy about the approach. What is the compelling reason
> > > for this change except for the "but it looks ugly"?
> >
> > The diffstat (600 lines removed). Also the fact that we have all these workarounds
> > in the core code just for the special case of 1 arch (s390) and a half
> > (powerpc with CONFIG_VIRT_CPU_ACCOUNTING_NATIVE).
> >
> > I'd much rather have all that complexity moved in a vtime_native.c shared by s390 and powerpc
> > that takes care of proper accumulation in cputime_t and flushes that on ticks in nsecs rather
> > than having all these cputime_t game all over the kernel.
>
> The goal to have nano-seconds only in the core code is a good one. And with the
> accumulator I think s390 can live with it. The change would have a real upside
> too. There are these stupid divisions for scaled cputime that we have to calculate
> for every call to account_xxx_time(). These would not be done for the interrupts
> anymore.
Exactly!
Thanks.
On Mon, Nov 21, 2016 at 10:49:23AM +0100, Ingo Molnar wrote:
>
> * Frederic Weisbecker <[email protected]> wrote:
>
> > > But I am still not happy about the approach. What is the compelling reason for
> > > this change except for the "but it looks ugly"?
> >
> > The diffstat (600 lines removed). Also the fact that we have all these
> > workarounds in the core code just for the special case of 1 arch (s390) and a
> > half (powerpc with CONFIG_VIRT_CPU_ACCOUNTING_NATIVE).
> >
> > I'd much rather have all that complexity moved in a vtime_native.c shared by
> > s390 and powerpc that takes care of proper accumulation in cputime_t and flushes
> > that on ticks in nsecs rather than having all these cputime_t game all over the
> > kernel.
>
> I agree - we really want to concentrate complexity in such a fashion and generally
> standardize on nanosecs, and the diffstat of the patchset is really nice.
Yeah, although the diffstat might become less impressive after the accumulator code,
but still worth it I think.
> The patchset obviously has to build (and work!) on s390/powerpc properly.
Of course!
Thanks.
On Mon, 21 Nov 2016 17:20:06 +0100
Frederic Weisbecker <[email protected]> wrote:
> On Mon, Nov 21, 2016 at 07:59:56AM +0100, Martin Schwidefsky wrote:
> > On Fri, 18 Nov 2016 15:47:02 +0100
> > Frederic Weisbecker <[email protected]> wrote:
> > > > The do_account_vtime function is called once per jiffy and once per task
> > > > switch. HZ is usually set to 100 for s390, the conversion once per jiffy
> > > > would not be so bad, but the call on the scheduling path *will* hurt.
> > >
> > > I don't think we need to flush on task switch. If we maintain the accumulators
> > > on the task/thread struct instead of per-cpu, then the remaining time after
> > > task switch out will be accounted on next tick after after next task switch in.
> >
> > You can not properly calculate steal time if you allow sleeping tasks to sit on
> > up to 5*HZ worth of cpu time.
>
> Ah, you mean that when the task goes to sleep, we shouldn't miss more than one
> tick worth of system/user time but the steal time can be much higher, right?
No, it is worse than that. Consider a task going to sleep just before a tick
arrives. It will have almost a full HZ time-slice in its task specific accounting
numbers. After the switch another task with a different set of accounting numbers
is running. The tick will not push the cputime for the work done in the last
HZ period. Dependent on what the new task has in its accounting number the steal
time calculation can give you anything. Repeat the whole thing with any number
of tasks and the missing cputime can get really large. Now get one of these
processes back at the beginning of a time slice and you can get nearly 200% worth
of cputime in one tick. Switch to the next task with missing cputime at the start
of the new tick and you can get many ticks with too much cputime.
Not doing accounting on task switch is just broken.
> > I think we *have* to do accounting on task switch.
> > At least on s390, likely on powerpc as well. Why not make that an option for
> > the architecture with the yet-to-be-written accumulating code.
>
> Ok, how about doing the accumulation and always account on task switch for now,
> we'll see later if it's worth having such an option.
I am convinced that we need it. The prototype patch does it for s390.
--
blue skies,
Martin.
"Reality continues to ruin my life." - Calvin.
On Mon, Nov 21, 2016 at 11:17:28AM +0100, Martin Schwidefsky wrote:
> On Mon, 21 Nov 2016 07:59:56 +0100
> Martin Schwidefsky <[email protected]> wrote:
>
> > On Fri, 18 Nov 2016 15:47:02 +0100
> > Frederic Weisbecker <[email protected]> wrote:
> >
> > > On Fri, Nov 18, 2016 at 01:08:46PM +0100, Martin Schwidefsky wrote:
> > > > On Thu, 17 Nov 2016 19:08:07 +0100
> > > > Frederic Weisbecker <[email protected]> wrote:
> > > >
> > > > Now it has been proposed to implement lazy accounting to accumulate deltas
> > > > and do the expensive conversions only infrequently. This is pretty straight-
> > > > forward for account_user_time but to do this for the account_system_time
> > > > function is more complicated. The function has to differentiate between
> > > > guest/hardirq/softirq and pure system time. We would need to keep sums for
> > > > each bucket and provide a separate function to add to each bucket. Like
> > > > account_guest_time(), account_hardirq_time(), account_softirq_time() and
> > > > account_system_time(). Then it is up to the arch code to sort out the details
> > > > and call the accounting code once per jiffy for each of the buckets.
> > >
> > > That wouldn't be too hard really. The s390 code in vtime.c already does that.
> >
> > Yes, I agree that the accumulating change would not be too hard. Can I make the
> > request that we try to get that done first before doing the cleanup ?
>
> Played with the idea a bit, here is a prototype patch to do the delay system time
> accounting for s390. It applies against the latest s390 features tree which you'll
> find here
>
> git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git features
>
> The details probably needs some more work but it works.
>
> --
> From 1b5ef9ddf899da81a48de826f783b15e6fc45d25 Mon Sep 17 00:00:00 2001
> From: Martin Schwidefsky <[email protected]>
> Date: Mon, 21 Nov 2016 10:44:10 +0100
> Subject: [PATCH] s390/cputime: delayed accounting of system time
>
> The account_system_time() function is called with a cputime that
> occurred while running in the kernel. The function detects which
> context the CPU is currently running in and accounts the time to
> the correct bucket. This forces the arch code to account the
> cputime for hardirq and softirq immediately.
>
> Make account_guest_time non-static and add account_sys_time,
> account_hardirq_time and account_softirq_time. With these functions
> the arch code can delay the accounting for system time. For s390
> the accounting is done once per timer tick and for each task switch.
>
> Signed-off-by: Martin Schwidefsky <[email protected]>
Thanks a lot for taking care of that! I'll give a try to do the same
on powerpc.
A few comments below:
> ---
> arch/s390/include/asm/lowcore.h | 65 ++++++++++++-----------
> arch/s390/include/asm/processor.h | 3 ++
> arch/s390/kernel/vtime.c | 106 ++++++++++++++++++++++----------------
> include/linux/kernel_stat.h | 13 +++--
> kernel/sched/cputime.c | 22 +++++++-
> 5 files changed, 129 insertions(+), 80 deletions(-)
>
> diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
> index 62a5cf1..8a5b082 100644
> --- a/arch/s390/include/asm/lowcore.h
> +++ b/arch/s390/include/asm/lowcore.h
[...]
> @@ -110,34 +119,48 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
> #endif
> : "=m" (S390_lowcore.last_update_timer),
> "=m" (S390_lowcore.last_update_clock));
> - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
> - S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
> + clock = S390_lowcore.last_update_clock - clock;
> + timer -= S390_lowcore.last_update_timer;
> +
> + if ((tsk->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0))
> + S390_lowcore.guest_timer += timer;
> + else if (hardirq_count() - hardirq_offset)
> + S390_lowcore.hardirq_timer += timer;
> + else if (in_serving_softirq())
> + S390_lowcore.softirq_timer += timer;
> + else
> + S390_lowcore.system_timer += timer;
I initially thought that some code could be shared for that whole accumulation. Now I
don't know if it would be a good idea. An example would be to deal with the contexts above
in order to store the accumulation to the appropriate place.
>
> /* Update MT utilization calculation */
> if (smp_cpu_mtid &&
> time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
> update_mt_scaling();
>
> + /* Calculate cputime delta */
> user = S390_lowcore.user_timer - tsk->thread.user_timer;
> - S390_lowcore.steal_timer -= user;
> tsk->thread.user_timer = S390_lowcore.user_timer;
> -
> + guest = S390_lowcore.guest_timer - tsk->thread.guest_timer;
> + tsk->thread.guest_timer = S390_lowcore.guest_timer;
> system = S390_lowcore.system_timer - tsk->thread.system_timer;
> - S390_lowcore.steal_timer -= system;
> tsk->thread.system_timer = S390_lowcore.system_timer;
> -
> - user_scaled = user;
> - system_scaled = system;
> - /* Do MT utilization scaling */
> - if (smp_cpu_mtid) {
> - u64 mult = __this_cpu_read(mt_scaling_mult);
> - u64 div = __this_cpu_read(mt_scaling_div);
> -
> - user_scaled = (user_scaled * mult) / div;
> - system_scaled = (system_scaled * mult) / div;
> - }
> - account_user_time(tsk, user, user_scaled);
> - account_system_time(tsk, hardirq_offset, system, system_scaled);
> + hardirq = S390_lowcore.hardirq_timer - tsk->thread.hardirq_timer;
> + tsk->thread.hardirq_timer = S390_lowcore.hardirq_timer;
> + softirq = S390_lowcore.softirq_timer - tsk->thread.softirq_timer;
> + tsk->thread.softirq_timer = S390_lowcore.softirq_timer;
> + S390_lowcore.steal_timer +=
> + clock - user - guest - system - hardirq - softirq;
> +
> + /* Push account value */
> + if (user)
> + account_user_time(tsk, user, scale_vtime(user));
> + if (guest)
> + account_guest_time(tsk, guest, scale_vtime(guest));
> + if (system)
> + account_sys_time(tsk, system, scale_vtime(system));
> + if (hardirq)
> + account_hardirq_time(tsk, hardirq, scale_vtime(hardirq));
> + if (softirq)
> + account_softirq_time(tsk, softirq, scale_vtime(softirq));
And doing that would be another part of the shared code.
>
> steal = S390_lowcore.steal_timer;
> if ((s64) steal > 0) {
> @@ -145,16 +168,22 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
> account_steal_time(steal);
> }
>
> - return virt_timer_forward(user + system);
> + return virt_timer_forward(user + guest + system + hardirq + softirq);
> }
>
> void vtime_task_switch(struct task_struct *prev)
> {
> do_account_vtime(prev, 0);
> prev->thread.user_timer = S390_lowcore.user_timer;
> + prev->thread.guest_timer = S390_lowcore.guest_timer;
> prev->thread.system_timer = S390_lowcore.system_timer;
> + prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
> + prev->thread.softirq_timer = S390_lowcore.softirq_timer;
> S390_lowcore.user_timer = current->thread.user_timer;
> + S390_lowcore.guest_timer = current->thread.guest_timer;
> S390_lowcore.system_timer = current->thread.system_timer;
> + S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
> + S390_lowcore.softirq_timer = current->thread.softirq_timer;
> }
Ditto.
>
> /*
> @@ -174,31 +203,22 @@ void vtime_account_user(struct task_struct *tsk)
> */
> void vtime_account_irq_enter(struct task_struct *tsk)
> {
> - u64 timer, system, system_scaled;
> + u64 timer;
>
> timer = S390_lowcore.last_update_timer;
> S390_lowcore.last_update_timer = get_vtimer();
> - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
> -
> - /* Update MT utilization calculation */
> - if (smp_cpu_mtid &&
> - time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
> - update_mt_scaling();
> -
> - system = S390_lowcore.system_timer - tsk->thread.system_timer;
> - S390_lowcore.steal_timer -= system;
> - tsk->thread.system_timer = S390_lowcore.system_timer;
> - system_scaled = system;
> - /* Do MT utilization scaling */
> - if (smp_cpu_mtid) {
> - u64 mult = __this_cpu_read(mt_scaling_mult);
> - u64 div = __this_cpu_read(mt_scaling_div);
> -
> - system_scaled = (system_scaled * mult) / div;
> - }
> - account_system_time(tsk, 0, system, system_scaled);
> -
> - virt_timer_forward(system);
> + timer -= S390_lowcore.last_update_timer;
> +
> + if ((tsk->flags & PF_VCPU) && (irq_count() == 0))
> + S390_lowcore.guest_timer += timer;
> + else if (hardirq_count())
> + S390_lowcore.hardirq_timer += timer;
> + else if (in_serving_softirq())
> + S390_lowcore.softirq_timer += timer;
> + else
> + S390_lowcore.system_timer += timer;
And Ditto.
We could put together the accumulation in a common struct in s390_lowcore,
and its mirror in thread struct then have helpers take care of the contexts.
How does that sound to you, would it help or hurt?
Thanks.
On Tue, 22 Nov 2016 14:45:56 +0100
Frederic Weisbecker <[email protected]> wrote:
> On Mon, Nov 21, 2016 at 11:17:28AM +0100, Martin Schwidefsky wrote:
> > On Mon, 21 Nov 2016 07:59:56 +0100
> > Martin Schwidefsky <[email protected]> wrote:
> [...]
> > @@ -110,34 +119,48 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
> > #endif
> > : "=m" (S390_lowcore.last_update_timer),
> > "=m" (S390_lowcore.last_update_clock));
> > - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
> > - S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
> > + clock = S390_lowcore.last_update_clock - clock;
> > + timer -= S390_lowcore.last_update_timer;
> > +
> > + if ((tsk->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0))
> > + S390_lowcore.guest_timer += timer;
> > + else if (hardirq_count() - hardirq_offset)
> > + S390_lowcore.hardirq_timer += timer;
> > + else if (in_serving_softirq())
> > + S390_lowcore.softirq_timer += timer;
> > + else
> > + S390_lowcore.system_timer += timer;
>
> I initially thought that some code could be shared for that whole accumulation. Now I
> don't know if it would be a good idea. An example would be to deal with the contexts above
> in order to store the accumulation to the appropriate place.
I thought about a common code inline function that returns the index
(CPUTIME_SYSTEM, CPUTIME_IRQ, ..) for the current context. Did not look
too appealing anymore after I type it down.
> > - account_user_time(tsk, user, user_scaled);
> > - account_system_time(tsk, hardirq_offset, system, system_scaled);
> > + hardirq = S390_lowcore.hardirq_timer - tsk->thread.hardirq_timer;
> > + tsk->thread.hardirq_timer = S390_lowcore.hardirq_timer;
> > + softirq = S390_lowcore.softirq_timer - tsk->thread.softirq_timer;
> > + tsk->thread.softirq_timer = S390_lowcore.softirq_timer;
> > + S390_lowcore.steal_timer +=
> > + clock - user - guest - system - hardirq - softirq;
> > +
> > + /* Push account value */
> > + if (user)
> > + account_user_time(tsk, user, scale_vtime(user));
> > + if (guest)
> > + account_guest_time(tsk, guest, scale_vtime(guest));
> > + if (system)
> > + account_sys_time(tsk, system, scale_vtime(system));
> > + if (hardirq)
> > + account_hardirq_time(tsk, hardirq, scale_vtime(hardirq));
> > + if (softirq)
> > + account_softirq_time(tsk, softirq, scale_vtime(softirq));
>
> And doing that would be another part of the shared code.
Right now I would feel more comfortable if that stays architecture code.
The calculation up to the point where accout_xxx_time function can be
called is definitely arch specific. Why try to do the accumulation in
common code? I have the feeling that would just complicate the code for
no good reason.
> >
> > steal = S390_lowcore.steal_timer;
> > if ((s64) steal > 0) {
> > @@ -145,16 +168,22 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
> > account_steal_time(steal);
> > }
> >
> > - return virt_timer_forward(user + system);
> > + return virt_timer_forward(user + guest + system + hardirq + softirq);
> > }
> >
> > void vtime_task_switch(struct task_struct *prev)
> > {
> > do_account_vtime(prev, 0);
> > prev->thread.user_timer = S390_lowcore.user_timer;
> > + prev->thread.guest_timer = S390_lowcore.guest_timer;
> > prev->thread.system_timer = S390_lowcore.system_timer;
> > + prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
> > + prev->thread.softirq_timer = S390_lowcore.softirq_timer;
> > S390_lowcore.user_timer = current->thread.user_timer;
> > + S390_lowcore.guest_timer = current->thread.guest_timer;
> > S390_lowcore.system_timer = current->thread.system_timer;
> > + S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
> > + S390_lowcore.softirq_timer = current->thread.softirq_timer;
> > }
>
> Ditto.
Same here. The lowcore fields are too arch specific.
> >
> > /*
> > @@ -174,31 +203,22 @@ void vtime_account_user(struct task_struct *tsk)
> > */
> > void vtime_account_irq_enter(struct task_struct *tsk)
> > {
> > - u64 timer, system, system_scaled;
> > + u64 timer;
> >
> > timer = S390_lowcore.last_update_timer;
> > S390_lowcore.last_update_timer = get_vtimer();
> > - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
> > -
> > - /* Update MT utilization calculation */
> > - if (smp_cpu_mtid &&
> > - time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
> > - update_mt_scaling();
> > -
> > - system = S390_lowcore.system_timer - tsk->thread.system_timer;
> > - S390_lowcore.steal_timer -= system;
> > - tsk->thread.system_timer = S390_lowcore.system_timer;
> > - system_scaled = system;
> > - /* Do MT utilization scaling */
> > - if (smp_cpu_mtid) {
> > - u64 mult = __this_cpu_read(mt_scaling_mult);
> > - u64 div = __this_cpu_read(mt_scaling_div);
> > -
> > - system_scaled = (system_scaled * mult) / div;
> > - }
> > - account_system_time(tsk, 0, system, system_scaled);
> > -
> > - virt_timer_forward(system);
> > + timer -= S390_lowcore.last_update_timer;
> > +
> > + if ((tsk->flags & PF_VCPU) && (irq_count() == 0))
> > + S390_lowcore.guest_timer += timer;
> > + else if (hardirq_count())
> > + S390_lowcore.hardirq_timer += timer;
> > + else if (in_serving_softirq())
> > + S390_lowcore.softirq_timer += timer;
> > + else
> > + S390_lowcore.system_timer += timer;
>
> And Ditto.
It would be nice if we can find a solution to make the decision
tree where to put the cputime delta into common code.
> We could put together the accumulation in a common struct in s390_lowcore,
> and its mirror in thread struct then have helpers take care of the contexts.
>
> How does that sound to you, would it help or hurt?
My gut feeling is that the try to make the accumulation code common will
hurt more than it helps. But we can certainly try and look at the result.
I spent some more time on this, here is my current patch. For my part
the patch is close to the final solution if we can agree on it.
--
>From a8f5d41df5f32897335567ea9f5a61a716855d5d Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <[email protected]>
Date: Mon, 21 Nov 2016 10:44:10 +0100
Subject: [PATCH] s390/cputime: delayed accounting of system time
The account_system_time() function is called with a cputime that
occurred while running in the kernel. The function detects the
current context of the CPU (system, guest, irq, or softirq) and
accounts the time to the correct bucket. This forces the arch code
to account the cputime for hardirq and softirq before entering
and after leaving the context in question.
Make account_guest_time non-static and add account_system_time_native,
With these two functions the arch code can delay the accounting for
system time. For s390 the accounting is done once per timer tick and
for each task switch.
Signed-off-by: Martin Schwidefsky <[email protected]>
---
arch/s390/include/asm/lowcore.h | 65 ++++++++++----------
arch/s390/include/asm/processor.h | 3 +
arch/s390/kernel/vtime.c | 126 +++++++++++++++++++++++---------------
include/linux/kernel_stat.h | 4 ++
kernel/sched/cputime.c | 12 +++-
5 files changed, 127 insertions(+), 83 deletions(-)
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 62a5cf1..8a5b082 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -85,53 +85,56 @@ struct lowcore {
__u64 mcck_enter_timer; /* 0x02c0 */
__u64 exit_timer; /* 0x02c8 */
__u64 user_timer; /* 0x02d0 */
- __u64 system_timer; /* 0x02d8 */
- __u64 steal_timer; /* 0x02e0 */
- __u64 last_update_timer; /* 0x02e8 */
- __u64 last_update_clock; /* 0x02f0 */
- __u64 int_clock; /* 0x02f8 */
- __u64 mcck_clock; /* 0x0300 */
- __u64 clock_comparator; /* 0x0308 */
+ __u64 guest_timer; /* 0x02d8 */
+ __u64 system_timer; /* 0x02e0 */
+ __u64 hardirq_timer; /* 0x02e8 */
+ __u64 softirq_timer; /* 0x02f0 */
+ __u64 steal_timer; /* 0x02f8 */
+ __u64 last_update_timer; /* 0x0300 */
+ __u64 last_update_clock; /* 0x0308 */
+ __u64 int_clock; /* 0x0310 */
+ __u64 mcck_clock; /* 0x0318 */
+ __u64 clock_comparator; /* 0x0320 */
/* Current process. */
- __u64 current_task; /* 0x0310 */
- __u8 pad_0x318[0x320-0x318]; /* 0x0318 */
- __u64 kernel_stack; /* 0x0320 */
+ __u64 current_task; /* 0x0328 */
+ __u8 pad_0x318[0x320-0x318]; /* 0x0330 */
+ __u64 kernel_stack; /* 0x0338 */
/* Interrupt, panic and restart stack. */
- __u64 async_stack; /* 0x0328 */
- __u64 panic_stack; /* 0x0330 */
- __u64 restart_stack; /* 0x0338 */
+ __u64 async_stack; /* 0x0340 */
+ __u64 panic_stack; /* 0x0348 */
+ __u64 restart_stack; /* 0x0350 */
/* Restart function and parameter. */
- __u64 restart_fn; /* 0x0340 */
- __u64 restart_data; /* 0x0348 */
- __u64 restart_source; /* 0x0350 */
+ __u64 restart_fn; /* 0x0358 */
+ __u64 restart_data; /* 0x0360 */
+ __u64 restart_source; /* 0x0368 */
/* Address space pointer. */
- __u64 kernel_asce; /* 0x0358 */
- __u64 user_asce; /* 0x0360 */
+ __u64 kernel_asce; /* 0x0370 */
+ __u64 user_asce; /* 0x0378 */
/*
* The lpp and current_pid fields form a
* 64-bit value that is set as program
* parameter with the LPP instruction.
*/
- __u32 lpp; /* 0x0368 */
- __u32 current_pid; /* 0x036c */
+ __u32 lpp; /* 0x0380 */
+ __u32 current_pid; /* 0x0384 */
/* SMP info area */
- __u32 cpu_nr; /* 0x0370 */
- __u32 softirq_pending; /* 0x0374 */
- __u64 percpu_offset; /* 0x0378 */
- __u64 vdso_per_cpu_data; /* 0x0380 */
- __u64 machine_flags; /* 0x0388 */
- __u32 preempt_count; /* 0x0390 */
- __u8 pad_0x0394[0x0398-0x0394]; /* 0x0394 */
- __u64 gmap; /* 0x0398 */
- __u32 spinlock_lockval; /* 0x03a0 */
- __u32 fpu_flags; /* 0x03a4 */
- __u8 pad_0x03a8[0x0400-0x03a8]; /* 0x03a8 */
+ __u32 cpu_nr; /* 0x0388 */
+ __u32 softirq_pending; /* 0x038c */
+ __u64 percpu_offset; /* 0x0390 */
+ __u64 vdso_per_cpu_data; /* 0x0398 */
+ __u64 machine_flags; /* 0x03a0 */
+ __u32 preempt_count; /* 0x03a8 */
+ __u8 pad_0x03ac[0x03b0-0x03ac]; /* 0x03ac */
+ __u64 gmap; /* 0x03b0 */
+ __u32 spinlock_lockval; /* 0x03b8 */
+ __u32 fpu_flags; /* 0x03bc */
+ __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */
/* Per cpu primary space access list */
__u32 paste[16]; /* 0x0400 */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index bf8b2e2..0234eea 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -111,7 +111,10 @@ struct thread_struct {
unsigned int acrs[NUM_ACRS];
unsigned long ksp; /* kernel stack pointer */
unsigned long user_timer; /* task cputime in user space */
+ unsigned long guest_timer; /* task cputime in kvm guest */
unsigned long system_timer; /* task cputime in kernel space */
+ unsigned long hardirq_timer; /* task cputime in hardirq context */
+ unsigned long softirq_timer; /* task cputime in softirq context */
unsigned long sys_call_table; /* system call table address */
mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 9a6c957..b6de91e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -90,14 +90,30 @@ static void update_mt_scaling(void)
__this_cpu_write(mt_scaling_jiffies, jiffies_64);
}
+static inline u64 update_tsk_timer(unsigned long *tsk_vtime, u64 new)
+{
+ u64 delta;
+
+ delta = new - *tsk_vtime;
+ *tsk_vtime = new;
+ return delta;
+}
+
+static inline u64 scale_vtime(u64 vtime)
+{
+ u64 mult = __this_cpu_read(mt_scaling_mult);
+ u64 div = __this_cpu_read(mt_scaling_div);
+
+ return smp_cpu_mtid ? (vtime * mult / div) : vtime;
+}
+
/*
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
-static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
+static int do_account_vtime(struct task_struct *tsk)
{
- u64 timer, clock, user, system, steal;
- u64 user_scaled, system_scaled;
+ u64 timer, clock, user, guest, system, hardirq, softirq, steal;
timer = S390_lowcore.last_update_timer;
clock = S390_lowcore.last_update_clock;
@@ -110,34 +126,47 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
#endif
: "=m" (S390_lowcore.last_update_timer),
"=m" (S390_lowcore.last_update_clock));
- S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
- S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
+ clock = S390_lowcore.last_update_clock - clock;
+ timer -= S390_lowcore.last_update_timer;
+
+ if (hardirq_count())
+ S390_lowcore.hardirq_timer += timer;
+ else
+ S390_lowcore.system_timer += timer;
/* Update MT utilization calculation */
if (smp_cpu_mtid &&
time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
update_mt_scaling();
- user = S390_lowcore.user_timer - tsk->thread.user_timer;
- S390_lowcore.steal_timer -= user;
- tsk->thread.user_timer = S390_lowcore.user_timer;
-
- system = S390_lowcore.system_timer - tsk->thread.system_timer;
- S390_lowcore.steal_timer -= system;
- tsk->thread.system_timer = S390_lowcore.system_timer;
-
- user_scaled = user;
- system_scaled = system;
- /* Do MT utilization scaling */
- if (smp_cpu_mtid) {
- u64 mult = __this_cpu_read(mt_scaling_mult);
- u64 div = __this_cpu_read(mt_scaling_div);
-
- user_scaled = (user_scaled * mult) / div;
- system_scaled = (system_scaled * mult) / div;
- }
- account_user_time(tsk, user, user_scaled);
- account_system_time(tsk, hardirq_offset, system, system_scaled);
+ /* Calculate cputime delta */
+ user = update_tsk_timer(&tsk->thread.user_timer,
+ READ_ONCE(S390_lowcore.user_timer));
+ guest = update_tsk_timer(&tsk->thread.guest_timer,
+ READ_ONCE(S390_lowcore.guest_timer));
+ system = update_tsk_timer(&tsk->thread.system_timer,
+ READ_ONCE(S390_lowcore.system_timer));
+ hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
+ READ_ONCE(S390_lowcore.hardirq_timer));
+ softirq = update_tsk_timer(&tsk->thread.softirq_timer,
+ READ_ONCE(S390_lowcore.softirq_timer));
+ S390_lowcore.steal_timer +=
+ clock - user - guest - system - hardirq - softirq;
+
+ /* Push accounting values */
+ if (user)
+ account_user_time(tsk, user, scale_vtime(user));
+ if (guest)
+ account_guest_time(tsk, guest, scale_vtime(guest));
+ if (system)
+ account_system_time_native(tsk, system, scale_vtime(system),
+ CPUTIME_SYSTEM);
+ if (hardirq)
+ account_system_time_native(tsk, hardirq, scale_vtime(hardirq),
+ CPUTIME_IRQ);
+ if (softirq)
+ account_system_time_native(tsk, softirq, scale_vtime(softirq),
+ CPUTIME_SOFTIRQ);
steal = S390_lowcore.steal_timer;
if ((s64) steal > 0) {
@@ -145,16 +174,22 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
account_steal_time(steal);
}
- return virt_timer_forward(user + system);
+ return virt_timer_forward(timer);
}
void vtime_task_switch(struct task_struct *prev)
{
- do_account_vtime(prev, 0);
+ do_account_vtime(prev);
prev->thread.user_timer = S390_lowcore.user_timer;
+ prev->thread.guest_timer = S390_lowcore.guest_timer;
prev->thread.system_timer = S390_lowcore.system_timer;
+ prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
+ prev->thread.softirq_timer = S390_lowcore.softirq_timer;
S390_lowcore.user_timer = current->thread.user_timer;
+ S390_lowcore.guest_timer = current->thread.guest_timer;
S390_lowcore.system_timer = current->thread.system_timer;
+ S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
+ S390_lowcore.softirq_timer = current->thread.softirq_timer;
}
/*
@@ -164,7 +199,7 @@ void vtime_task_switch(struct task_struct *prev)
*/
void vtime_account_user(struct task_struct *tsk)
{
- if (do_account_vtime(tsk, HARDIRQ_OFFSET))
+ if (do_account_vtime(tsk))
virt_timer_expire();
}
@@ -174,31 +209,22 @@ void vtime_account_user(struct task_struct *tsk)
*/
void vtime_account_irq_enter(struct task_struct *tsk)
{
- u64 timer, system, system_scaled;
+ u64 timer;
timer = S390_lowcore.last_update_timer;
S390_lowcore.last_update_timer = get_vtimer();
- S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
-
- /* Update MT utilization calculation */
- if (smp_cpu_mtid &&
- time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
- update_mt_scaling();
-
- system = S390_lowcore.system_timer - tsk->thread.system_timer;
- S390_lowcore.steal_timer -= system;
- tsk->thread.system_timer = S390_lowcore.system_timer;
- system_scaled = system;
- /* Do MT utilization scaling */
- if (smp_cpu_mtid) {
- u64 mult = __this_cpu_read(mt_scaling_mult);
- u64 div = __this_cpu_read(mt_scaling_div);
-
- system_scaled = (system_scaled * mult) / div;
- }
- account_system_time(tsk, 0, system, system_scaled);
-
- virt_timer_forward(system);
+ timer -= S390_lowcore.last_update_timer;
+
+ if ((tsk->flags & PF_VCPU) && (irq_count() == 0))
+ S390_lowcore.guest_timer += timer;
+ else if (hardirq_count())
+ S390_lowcore.hardirq_timer += timer;
+ else if (in_serving_softirq())
+ S390_lowcore.softirq_timer += timer;
+ else
+ S390_lowcore.system_timer += timer;
+
+ virt_timer_forward(timer);
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 44fda64..a7e7951 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -80,10 +80,14 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
+extern void account_guest_time(struct task_struct *, cputime_t, cputime_t);
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+extern void account_system_time_native(struct task_struct *, cputime_t,
+ cputime_t, int);
+
static inline void account_process_tick(struct task_struct *tsk, int user)
{
vtime_account_user(tsk);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5ebee31..9e6c5aa 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -155,8 +155,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
* @cputime: the cpu time spent in virtual machine since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
-static void account_guest_time(struct task_struct *p, cputime_t cputime,
- cputime_t cputime_scaled)
+void account_guest_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
@@ -199,6 +199,14 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
acct_account_cputime(p);
}
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+void account_system_time_native(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled, int index)
+{
+ __account_system_time(p, cputime, cputime_scaled, index);
+}
+#endif
+
/*
* Account system cpu time to a process.
* @p: the process that the cpu time gets accounted to
--
2.8.4
--
blue skies,
Martin.
"Reality continues to ruin my life." - Calvin.