Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id ; Fri, 1 Mar 2002 10:49:34 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id ; Fri, 1 Mar 2002 10:49:16 -0500 Received: from gateway-1237.mvista.com ([12.44.186.158]:28664 "EHLO hermes.mvista.com") by vger.kernel.org with ESMTP id ; Fri, 1 Mar 2002 10:49:07 -0500 Message-ID: <3C7FA2CC.F5D69F42@mvista.com> Date: Fri, 01 Mar 2002 07:48:28 -0800 From: george anzinger Organization: Monta Vista Software X-Mailer: Mozilla 4.77 [en] (X11; U; Linux 2.2.12-20b i686) X-Accept-Language: en MIME-Version: 1.0 To: Tim Schmielau CC: linux-kernel@vger.kernel.org Subject: Re: [patch] enable uptime display > 497 days on 32 bit (1/2) In-Reply-To: Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Tim Schmielau wrote: > > rediffed to 2.4.19-pre2 and three micro-optimizations: > > move jiffies_hi etc. to same cacheline as jiffies > (suggested by George Anzinger) > avoid turning off interrupts (suggested by Andreas Dilger) > use unlikely() (suggested by Andreas Dilger) > > As no other comments turned up, this will go to Marcelo RSN. > (wondered why noone vetoed this as overkill...) I figure that is Marcelo's job :) > > Tim > > --- linux-2.4.19-pre2/include/linux/sched.h Thu Feb 28 23:52:25 2002 > +++ linux-2.4.19-pre2-j64/include/linux/sched.h Fri Mar 1 02:39:09 2002 > @@ -361,7 +361,7 @@ > unsigned long it_real_incr, it_prof_incr, it_virt_incr; > struct timer_list real_timer; > struct tms times; > - unsigned long start_time; > + u64 start_time; > long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS]; > /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ > unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap; > @@ -573,6 +573,18 @@ > #include > > extern unsigned long volatile jiffies; > +#if BITS_PER_LONG < 48 > +# define NEEDS_JIFFIES64 > + extern u64 get_jiffies64(void); > +#else > + /* jiffies is wide enough to not wrap for 8716 years at HZ==1024 */ > + static inline u64 get_jiffies64(void) > + { > + return (u64)jiffies; > + } > +#endif > + > + > extern unsigned long itimer_ticks; > extern unsigned long itimer_next; > extern struct timeval xtime; > > --- linux-2.4.19-pre2/kernel/timer.c Mon Oct 8 19:41:41 2001 > +++ linux-2.4.19-pre2-j64/kernel/timer.c Fri Mar 1 00:45:04 2002 > @@ -66,6 +66,10 @@ > extern int do_setitimer(int, struct itimerval *, struct itimerval *); > > unsigned long volatile jiffies; > +#ifdef NEEDS_JIFFIES64 > + static unsigned long jiffies_hi, jiffies_last; > + static spinlock_t jiffies64_lock = SPIN_LOCK_UNLOCKED; > +#endif > > unsigned int * prof_buffer; > unsigned long prof_len; > @@ -103,6 +107,8 @@ > > #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) > > +static inline void init_jiffieswrap_timer(void); > + > void init_timervecs (void) > { > int i; > @@ -115,6 +121,8 @@ > } > for (i = 0; i < TVR_SIZE; i++) > INIT_LIST_HEAD(tv1.vec + i); > + > + init_jiffieswrap_timer(); > } > > static unsigned long timer_jiffies; > @@ -683,6 +691,61 @@ > if (TQ_ACTIVE(tq_timer)) > mark_bh(TQUEUE_BH); > } > + > + > +#ifdef NEEDS_JIFFIES64 > + > +u64 get_jiffies64(void) > +{ > + unsigned long jiffies_tmp, jiffies_hi_tmp; > + > + spin_lock(&jiffies64_lock); > + jiffies_tmp = jiffies; /* avoid races */ > + jiffies_hi_tmp = jiffies_hi; > + if (unlikely(jiffies_tmp < jiffies_last)) /* We have a wrap */ > + jiffies_hi++; > + jiffies_last = jiffies_tmp; > + spin_unlock(&jiffies64_lock); > + > + return (jiffies_tmp | ((u64)jiffies_hi_tmp) << BITS_PER_LONG); > +} > + > +/* use a timer to periodically check for jiffies overflow */ > + > +static struct timer_list jiffieswrap_timer; > +#define CHECK_JIFFIESWRAP_INTERVAL (1ul << (BITS_PER_LONG-2)) > + > +static void check_jiffieswrap(unsigned long data) > +{ > + unsigned long jiffies_tmp; > + mod_timer(&jiffieswrap_timer, jiffies + CHECK_JIFFIESWRAP_INTERVAL); > + > + if (spin_trylock(&jiffies64_lock)) { > + /* If we don't get the lock, we can just give up. > + The current holder of the lock will check for wraps */ > + jiffies_tmp = jiffies; /* avoid races */ > + if (jiffies_tmp < jiffies_last) /* We have a wrap */ > + jiffies_hi++; > + jiffies_last = jiffies_tmp; > + spin_unlock(&jiffies64_lock); > + } } > + > +static inline void init_jiffieswrap_timer(void) > +{ > + init_timer(&jiffieswrap_timer); > + jiffieswrap_timer.expires = jiffies + CHECK_JIFFIESWRAP_INTERVAL; > + jiffieswrap_timer.function = check_jiffieswrap; > + add_timer(&jiffieswrap_timer); > +} > + > +#else > + > +static inline void init_jiffieswrap_timer(void) > +{ > +} > + > +#endif /* NEEDS_JIFFIES64 */ > + > > #if !defined(__alpha__) && !defined(__ia64__) > > > --- linux-2.4.19-pre2/kernel/fork.c Sun Feb 24 19:20:43 2002 > +++ linux-2.4.19-pre2-j64/kernel/fork.c Fri Mar 1 00:05:24 2002 > @@ -657,7 +657,7 @@ > } > #endif > p->lock_depth = -1; /* -1 = no lock */ > - p->start_time = jiffies; > + p->start_time = get_jiffies64(); > > INIT_LIST_HEAD(&p->local_pages); > > > --- linux-2.4.19-pre2/kernel/info.c Sat Apr 21 01:15:40 2001 > +++ linux-2.4.19-pre2-j64/kernel/info.c Fri Mar 1 00:05:24 2002 > @@ -12,15 +12,19 @@ > #include > > #include > +#include > > asmlinkage long sys_sysinfo(struct sysinfo *info) > { > struct sysinfo val; > + u64 uptime; > > memset((char *)&val, 0, sizeof(struct sysinfo)); > > cli(); > - val.uptime = jiffies / HZ; > + uptime = get_jiffies64(); > + do_div(uptime, HZ); > + val.uptime = (unsigned long) uptime; > > val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT); > val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT); > > --- linux-2.4.19-pre2/fs/proc/array.c Thu Oct 11 18:00:01 2001 > +++ linux-2.4.19-pre2-j64/fs/proc/array.c Fri Mar 1 00:05:24 2002 > @@ -343,7 +343,7 @@ > ppid = task->pid ? task->p_opptr->pid : 0; > read_unlock(&tasklist_lock); > res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ > -%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \ > +%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %llu %lu %ld %lu %lu %lu %lu %lu \ > %lu %lu %lu %lu %lu %lu %lu %lu %d %d\n", > task->pid, > task->comm, > @@ -366,7 +366,7 @@ > nice, > 0UL /* removed */, > task->it_real_value, > - task->start_time, > + (unsigned long long)(task->start_time), > vsize, > mm ? mm->rss : 0, /* you might want to shift this left 3 */ > task->rlim[RLIMIT_RSS].rlim_cur, > > --- linux-2.4.19-pre2/fs/proc/proc_misc.c Wed Nov 21 06:29:09 2001 > +++ linux-2.4.19-pre2-j64/fs/proc/proc_misc.c Fri Mar 1 01:32:28 2002 > @@ -40,6 +40,7 @@ > #include > #include > #include > +#include > > > #define LOAD_INT(x) ((x) >> FSHIFT) > @@ -93,37 +94,93 @@ > return proc_calc_metrics(page, start, off, count, eof, len); > } > > +#if BITS_PER_LONG < 48 > +static unsigned long idle_hi, idle_last; > +static spinlock_t idle64_lock = SPIN_LOCK_UNLOCKED; > + > +u64 get_idle64(void) > +{ > + unsigned long idle, idle_hi_tmp; > + > + spin_lock(&idle64_lock); > + idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime; > + if (unlikely(idle < idle_last)) /* We have a wrap */ > + idle_hi++; > + idle_last = idle; > + idle_hi_tmp = idle_hi; > + spin_unlock(&idle64_lock); > + > + return (idle | ((u64)idle_hi_tmp) << BITS_PER_LONG); > +} > + > +/* use a timer to periodically check for idle time overflow */ > + > +static struct timer_list idlewrap_timer; > +#define CHECK_IDLEWRAP_INTERVAL (1ul << (BITS_PER_LONG-2)) > + > +static void check_idlewrap(unsigned long data) > +{ > + unsigned long idle; > + > + mod_timer(&idlewrap_timer, jiffies + CHECK_IDLEWRAP_INTERVAL); > + if (spin_trylock(&idle64_lock)) { > + /* If we don't get the lock, we can just give up. > + The current holder of the lock will check for wraps */ > + idle = init_tasks[0]->times.tms_utime > + + init_tasks[0]->times.tms_stime; > + if (idle < idle_last) /* We have a wrap */ > + idle_hi++; > + idle_last = idle; > + spin_unlock(&idle64_lock); > + } } > + > +static inline void init_idlewrap_timer(void) > +{ > + init_timer(&idlewrap_timer); > + idlewrap_timer.expires = jiffies + CHECK_IDLEWRAP_INTERVAL; > + idlewrap_timer.function = check_idlewrap; > + add_timer(&idlewrap_timer); > +} > + > +#else > + /* Idle time won't overflow for 8716 years at HZ==1024 */ > + > +static inline u64 get_idle64(void) > +{ > + return (u64)(init_tasks[0]->times.tms_utime > + + init_tasks[0]->times.tms_stime); > +} > + > +static inline void init_idlewrap_timer(void) > +{ > +} > + > +#endif /* BITS_PER_LONG < 48 */ > + > static int uptime_read_proc(char *page, char **start, off_t off, > int count, int *eof, void *data) > { > - unsigned long uptime; > - unsigned long idle; > + u64 uptime, idle; > + unsigned long uptime_remainder, idle_remainder; > int len; > > - uptime = jiffies; > - idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime; > + uptime = get_jiffies64(); > + uptime_remainder = (unsigned long) do_div(uptime, HZ); > + idle = get_idle64(); > + idle_remainder = (unsigned long) do_div(idle, HZ); > > - /* The formula for the fraction parts really is ((t * 100) / HZ) % 100, but > - that would overflow about every five days at HZ == 100. > - Therefore the identity a = (a / b) * b + a % b is used so that it is > - calculated as (((t / HZ) * 100) + ((t % HZ) * 100) / HZ) % 100. > - The part in front of the '+' always evaluates as 0 (mod 100). All divisions > - in the above formulas are truncating. For HZ being a power of 10, the > - calculations simplify to the version in the #else part (if the printf > - format is adapted to the same number of digits as zeroes in HZ. > - */ > #if HZ!=100 > len = sprintf(page,"%lu.%02lu %lu.%02lu\n", > - uptime / HZ, > - (((uptime % HZ) * 100) / HZ) % 100, > - idle / HZ, > - (((idle % HZ) * 100) / HZ) % 100); > + (unsigned long) uptime, > + (uptime_remainder * 100) / HZ, > + (unsigned long) idle, > + (idle_remainder * 100) / HZ); > #else > len = sprintf(page,"%lu.%02lu %lu.%02lu\n", > - uptime / HZ, > - uptime % HZ, > - idle / HZ, > - idle % HZ); > + (unsigned long) uptime, > + uptime_remainder, > + (unsigned long) idle, > + idle_remainder); > #endif > return proc_calc_metrics(page, start, off, count, eof, len); > } > @@ -240,7 +297,7 @@ > { > int i, len; > extern unsigned long total_forks; > - unsigned long jif = jiffies; > + u64 jif = get_jiffies64(); > unsigned int sum = 0, user = 0, nice = 0, system = 0; > int major, disk; > > @@ -256,17 +313,19 @@ > #endif > } > > - len = sprintf(page, "cpu %u %u %u %lu\n", user, nice, system, > - jif * smp_num_cpus - (user + nice + system)); > + len = sprintf(page, "cpu %u %u %u %llu\n", user, nice, system, > + (unsigned long long) jif * smp_num_cpus > + - user - nice - system); > for (i = 0 ; i < smp_num_cpus; i++) > - len += sprintf(page + len, "cpu%d %u %u %u %lu\n", > + len += sprintf(page + len, "cpu%d %u %u %u %llu\n", > i, > kstat.per_cpu_user[cpu_logical_map(i)], > kstat.per_cpu_nice[cpu_logical_map(i)], > kstat.per_cpu_system[cpu_logical_map(i)], > - jif - ( kstat.per_cpu_user[cpu_logical_map(i)] \ > - + kstat.per_cpu_nice[cpu_logical_map(i)] \ > - + kstat.per_cpu_system[cpu_logical_map(i)])); > + (unsigned long long) jif > + - kstat.per_cpu_user[cpu_logical_map(i)] > + - kstat.per_cpu_nice[cpu_logical_map(i)] > + - kstat.per_cpu_system[cpu_logical_map(i)]); > len += sprintf(page + len, > "page %u %u\n" > "swap %u %u\n" > @@ -302,12 +361,13 @@ > } > } > > + do_div(jif, HZ); > len += sprintf(page + len, > "\nctxt %u\n" > "btime %lu\n" > "processes %lu\n", > kstat.context_swtch, > - xtime.tv_sec - jif / HZ, > + xtime.tv_sec - (unsigned long) jif, > total_forks); > > return proc_calc_metrics(page, start, off, count, eof, len); > @@ -565,4 +625,6 @@ > slabinfo_read_proc, NULL); > if (entry) > entry->write_proc = slabinfo_write_proc; > + > + init_idlewrap_timer(); > } > > --- linux-2.4.19-pre2/mm/oom_kill.c Sun Nov 4 02:05:25 2001 > +++ linux-2.4.19-pre2-j64/mm/oom_kill.c Fri Mar 1 00:05:24 2002 > @@ -69,11 +69,10 @@ > /* > * CPU time is in seconds and run time is in minutes. There is no > * particular reason for this other than that it turned out to work > - * very well in practice. This is not safe against jiffie wraps > - * but we don't care _that_ much... > + * very well in practice. > */ > cpu_time = (p->times.tms_utime + p->times.tms_stime) >> (SHIFT_HZ + 3); > - run_time = (jiffies - p->start_time) >> (SHIFT_HZ + 10); > + run_time = (get_jiffies64() - p->start_time) >> (SHIFT_HZ + 10); > > points /= int_sqrt(cpu_time); > points /= int_sqrt(int_sqrt(run_time)); > > --- linux-2.4.19-pre2/kernel/acct.c Thu Feb 28 23:52:26 2002 > +++ linux-2.4.19-pre2-j64/kernel/acct.c Fri Mar 1 00:06:51 2002 > @@ -56,6 +56,7 @@ > #include > > #include > +#include > > /* > * These constants control the amount of freespace that suspend and > @@ -227,20 +228,24 @@ > * This routine has been adopted from the encode_comp_t() function in > * the kern_acct.c file of the FreeBSD operating system. The encoding > * is a 13-bit fraction with a 3-bit (base 8) exponent. > + * > + * Bumped up to encode 64 bit values. Unfortunately the result may > + * overflow now. > */ > > #define MANTSIZE 13 /* 13 bit mantissa. */ > -#define EXPSIZE 3 /* Base 8 (3 bit) exponent. */ > +#define EXPSIZE 3 /* 3 bit exponent. */ > +#define EXPBASE 3 /* Base 8 (3 bit) exponent. */ > #define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */ > > -static comp_t encode_comp_t(unsigned long value) > +static comp_t encode_comp_t(u64 value) > { > int exp, rnd; > > exp = rnd = 0; > while (value > MAXFRACT) { > - rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */ > - value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */ > + rnd = value & (1 << (EXPBASE - 1)); /* Round up? */ > + value >>= EXPBASE; /* Base 8 exponent == 3 bit shift. */ > exp++; > } > > @@ -248,16 +253,21 @@ > * If we need to round up, do it (and handle overflow correctly). > */ > if (rnd && (++value > MAXFRACT)) { > - value >>= EXPSIZE; > + value >>= EXPBASE; > exp++; > } > > /* > * Clean it up and polish it off. > */ > - exp <<= MANTSIZE; /* Shift the exponent into place */ > - exp += value; /* and add on the mantissa. */ > - return exp; > + if (exp >= (1 << EXPSIZE)) { > + /* Overflow. Return largest representable number instead. */ > + return (1ul << (MANTSIZE + EXPSIZE)) - 1; > + } else { > + exp <<= MANTSIZE; /* Shift the exponent into place */ > + exp += value; /* and add on the mantissa. */ > + return exp; > + } > } > > /* > @@ -278,6 +288,7 @@ > mm_segment_t fs; > unsigned long vsize; > unsigned long flim; > + u64 elapsed; > > /* > * First check to see if there is enough free_space to continue > @@ -295,8 +306,10 @@ > strncpy(ac.ac_comm, current->comm, ACCT_COMM); > ac.ac_comm[ACCT_COMM - 1] = '\0'; > > - ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ)); > - ac.ac_etime = encode_comp_t(jiffies - current->start_time); > + elapsed = get_jiffies64() - current->start_time; > + ac.ac_etime = encode_comp_t(elapsed); > + do_div(elapsed, HZ); > + ac.ac_btime = xtime.tv_sec - elapsed; > ac.ac_utime = encode_comp_t(current->times.tms_utime); > ac.ac_stime = encode_comp_t(current->times.tms_stime); > ac.ac_uid = current->uid; > > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- George george@mvista.com High-res-timers: http://sourceforge.net/projects/high-res-timers/ Real time sched: http://sourceforge.net/projects/rtsched/ - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/