2002-03-01 03:01:55

by Tim Schmielau

[permalink] [raw]
Subject: [patch] enable uptime display > 497 days on 32 bit (1/2)

rediffed to 2.4.19-pre2 and three micro-optimizations:

move jiffies_hi etc. to same cacheline as jiffies
(suggested by George Anzinger)
avoid turning off interrupts (suggested by Andreas Dilger)
use unlikely() (suggested by Andreas Dilger)

As no other comments turned up, this will go to Marcelo RSN.
(wondered why noone vetoed this as overkill...)

Tim



--- linux-2.4.19-pre2/include/linux/sched.h Thu Feb 28 23:52:25 2002
+++ linux-2.4.19-pre2-j64/include/linux/sched.h Fri Mar 1 02:39:09 2002
@@ -361,7 +361,7 @@
unsigned long it_real_incr, it_prof_incr, it_virt_incr;
struct timer_list real_timer;
struct tms times;
- unsigned long start_time;
+ u64 start_time;
long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS];
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
@@ -573,6 +573,18 @@
#include <asm/current.h>

extern unsigned long volatile jiffies;
+#if BITS_PER_LONG < 48
+# define NEEDS_JIFFIES64
+ extern u64 get_jiffies64(void);
+#else
+ /* jiffies is wide enough to not wrap for 8716 years at HZ==1024 */
+ static inline u64 get_jiffies64(void)
+ {
+ return (u64)jiffies;
+ }
+#endif
+
+
extern unsigned long itimer_ticks;
extern unsigned long itimer_next;
extern struct timeval xtime;

--- linux-2.4.19-pre2/kernel/timer.c Mon Oct 8 19:41:41 2001
+++ linux-2.4.19-pre2-j64/kernel/timer.c Fri Mar 1 00:45:04 2002
@@ -66,6 +66,10 @@
extern int do_setitimer(int, struct itimerval *, struct itimerval *);

unsigned long volatile jiffies;
+#ifdef NEEDS_JIFFIES64
+ static unsigned long jiffies_hi, jiffies_last;
+ static spinlock_t jiffies64_lock = SPIN_LOCK_UNLOCKED;
+#endif

unsigned int * prof_buffer;
unsigned long prof_len;
@@ -103,6 +107,8 @@

#define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))

+static inline void init_jiffieswrap_timer(void);
+
void init_timervecs (void)
{
int i;
@@ -115,6 +121,8 @@
}
for (i = 0; i < TVR_SIZE; i++)
INIT_LIST_HEAD(tv1.vec + i);
+
+ init_jiffieswrap_timer();
}

static unsigned long timer_jiffies;
@@ -683,6 +691,61 @@
if (TQ_ACTIVE(tq_timer))
mark_bh(TQUEUE_BH);
}
+
+
+#ifdef NEEDS_JIFFIES64
+
+u64 get_jiffies64(void)
+{
+ unsigned long jiffies_tmp, jiffies_hi_tmp;
+
+ spin_lock(&jiffies64_lock);
+ jiffies_tmp = jiffies; /* avoid races */
+ jiffies_hi_tmp = jiffies_hi;
+ if (unlikely(jiffies_tmp < jiffies_last)) /* We have a wrap */
+ jiffies_hi++;
+ jiffies_last = jiffies_tmp;
+ spin_unlock(&jiffies64_lock);
+
+ return (jiffies_tmp | ((u64)jiffies_hi_tmp) << BITS_PER_LONG);
+}
+
+/* use a timer to periodically check for jiffies overflow */
+
+static struct timer_list jiffieswrap_timer;
+#define CHECK_JIFFIESWRAP_INTERVAL (1ul << (BITS_PER_LONG-2))
+
+static void check_jiffieswrap(unsigned long data)
+{
+ unsigned long jiffies_tmp;
+ mod_timer(&jiffieswrap_timer, jiffies + CHECK_JIFFIESWRAP_INTERVAL);
+
+ if (spin_trylock(&jiffies64_lock)) {
+ /* If we don't get the lock, we can just give up.
+ The current holder of the lock will check for wraps */
+ jiffies_tmp = jiffies; /* avoid races */
+ if (jiffies_tmp < jiffies_last) /* We have a wrap */
+ jiffies_hi++;
+ jiffies_last = jiffies_tmp;
+ spin_unlock(&jiffies64_lock);
+ } }
+
+static inline void init_jiffieswrap_timer(void)
+{
+ init_timer(&jiffieswrap_timer);
+ jiffieswrap_timer.expires = jiffies + CHECK_JIFFIESWRAP_INTERVAL;
+ jiffieswrap_timer.function = check_jiffieswrap;
+ add_timer(&jiffieswrap_timer);
+}
+
+#else
+
+static inline void init_jiffieswrap_timer(void)
+{
+}
+
+#endif /* NEEDS_JIFFIES64 */
+

#if !defined(__alpha__) && !defined(__ia64__)


--- linux-2.4.19-pre2/kernel/fork.c Sun Feb 24 19:20:43 2002
+++ linux-2.4.19-pre2-j64/kernel/fork.c Fri Mar 1 00:05:24 2002
@@ -657,7 +657,7 @@
}
#endif
p->lock_depth = -1; /* -1 = no lock */
- p->start_time = jiffies;
+ p->start_time = get_jiffies64();

INIT_LIST_HEAD(&p->local_pages);


--- linux-2.4.19-pre2/kernel/info.c Sat Apr 21 01:15:40 2001
+++ linux-2.4.19-pre2-j64/kernel/info.c Fri Mar 1 00:05:24 2002
@@ -12,15 +12,19 @@
#include <linux/smp_lock.h>

#include <asm/uaccess.h>
+#include <asm/div64.h>

asmlinkage long sys_sysinfo(struct sysinfo *info)
{
struct sysinfo val;
+ u64 uptime;

memset((char *)&val, 0, sizeof(struct sysinfo));

cli();
- val.uptime = jiffies / HZ;
+ uptime = get_jiffies64();
+ do_div(uptime, HZ);
+ val.uptime = (unsigned long) uptime;

val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);

--- linux-2.4.19-pre2/fs/proc/array.c Thu Oct 11 18:00:01 2001
+++ linux-2.4.19-pre2-j64/fs/proc/array.c Fri Mar 1 00:05:24 2002
@@ -343,7 +343,7 @@
ppid = task->pid ? task->p_opptr->pid : 0;
read_unlock(&tasklist_lock);
res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
-%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
+%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %llu %lu %ld %lu %lu %lu %lu %lu \
%lu %lu %lu %lu %lu %lu %lu %lu %d %d\n",
task->pid,
task->comm,
@@ -366,7 +366,7 @@
nice,
0UL /* removed */,
task->it_real_value,
- task->start_time,
+ (unsigned long long)(task->start_time),
vsize,
mm ? mm->rss : 0, /* you might want to shift this left 3 */
task->rlim[RLIMIT_RSS].rlim_cur,

--- linux-2.4.19-pre2/fs/proc/proc_misc.c Wed Nov 21 06:29:09 2001
+++ linux-2.4.19-pre2-j64/fs/proc/proc_misc.c Fri Mar 1 01:32:28 2002
@@ -40,6 +40,7 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/io.h>
+#include <asm/div64.h>


#define LOAD_INT(x) ((x) >> FSHIFT)
@@ -93,37 +94,93 @@
return proc_calc_metrics(page, start, off, count, eof, len);
}

+#if BITS_PER_LONG < 48
+static unsigned long idle_hi, idle_last;
+static spinlock_t idle64_lock = SPIN_LOCK_UNLOCKED;
+
+u64 get_idle64(void)
+{
+ unsigned long idle, idle_hi_tmp;
+
+ spin_lock(&idle64_lock);
+ idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
+ if (unlikely(idle < idle_last)) /* We have a wrap */
+ idle_hi++;
+ idle_last = idle;
+ idle_hi_tmp = idle_hi;
+ spin_unlock(&idle64_lock);
+
+ return (idle | ((u64)idle_hi_tmp) << BITS_PER_LONG);
+}
+
+/* use a timer to periodically check for idle time overflow */
+
+static struct timer_list idlewrap_timer;
+#define CHECK_IDLEWRAP_INTERVAL (1ul << (BITS_PER_LONG-2))
+
+static void check_idlewrap(unsigned long data)
+{
+ unsigned long idle;
+
+ mod_timer(&idlewrap_timer, jiffies + CHECK_IDLEWRAP_INTERVAL);
+ if (spin_trylock(&idle64_lock)) {
+ /* If we don't get the lock, we can just give up.
+ The current holder of the lock will check for wraps */
+ idle = init_tasks[0]->times.tms_utime
+ + init_tasks[0]->times.tms_stime;
+ if (idle < idle_last) /* We have a wrap */
+ idle_hi++;
+ idle_last = idle;
+ spin_unlock(&idle64_lock);
+ } }
+
+static inline void init_idlewrap_timer(void)
+{
+ init_timer(&idlewrap_timer);
+ idlewrap_timer.expires = jiffies + CHECK_IDLEWRAP_INTERVAL;
+ idlewrap_timer.function = check_idlewrap;
+ add_timer(&idlewrap_timer);
+}
+
+#else
+ /* Idle time won't overflow for 8716 years at HZ==1024 */
+
+static inline u64 get_idle64(void)
+{
+ return (u64)(init_tasks[0]->times.tms_utime
+ + init_tasks[0]->times.tms_stime);
+}
+
+static inline void init_idlewrap_timer(void)
+{
+}
+
+#endif /* BITS_PER_LONG < 48 */
+
static int uptime_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- unsigned long uptime;
- unsigned long idle;
+ u64 uptime, idle;
+ unsigned long uptime_remainder, idle_remainder;
int len;

- uptime = jiffies;
- idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
+ uptime = get_jiffies64();
+ uptime_remainder = (unsigned long) do_div(uptime, HZ);
+ idle = get_idle64();
+ idle_remainder = (unsigned long) do_div(idle, HZ);

- /* The formula for the fraction parts really is ((t * 100) / HZ) % 100, but
- that would overflow about every five days at HZ == 100.
- Therefore the identity a = (a / b) * b + a % b is used so that it is
- calculated as (((t / HZ) * 100) + ((t % HZ) * 100) / HZ) % 100.
- The part in front of the '+' always evaluates as 0 (mod 100). All divisions
- in the above formulas are truncating. For HZ being a power of 10, the
- calculations simplify to the version in the #else part (if the printf
- format is adapted to the same number of digits as zeroes in HZ.
- */
#if HZ!=100
len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
- uptime / HZ,
- (((uptime % HZ) * 100) / HZ) % 100,
- idle / HZ,
- (((idle % HZ) * 100) / HZ) % 100);
+ (unsigned long) uptime,
+ (uptime_remainder * 100) / HZ,
+ (unsigned long) idle,
+ (idle_remainder * 100) / HZ);
#else
len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
- uptime / HZ,
- uptime % HZ,
- idle / HZ,
- idle % HZ);
+ (unsigned long) uptime,
+ uptime_remainder,
+ (unsigned long) idle,
+ idle_remainder);
#endif
return proc_calc_metrics(page, start, off, count, eof, len);
}
@@ -240,7 +297,7 @@
{
int i, len;
extern unsigned long total_forks;
- unsigned long jif = jiffies;
+ u64 jif = get_jiffies64();
unsigned int sum = 0, user = 0, nice = 0, system = 0;
int major, disk;

@@ -256,17 +313,19 @@
#endif
}

- len = sprintf(page, "cpu %u %u %u %lu\n", user, nice, system,
- jif * smp_num_cpus - (user + nice + system));
+ len = sprintf(page, "cpu %u %u %u %llu\n", user, nice, system,
+ (unsigned long long) jif * smp_num_cpus
+ - user - nice - system);
for (i = 0 ; i < smp_num_cpus; i++)
- len += sprintf(page + len, "cpu%d %u %u %u %lu\n",
+ len += sprintf(page + len, "cpu%d %u %u %u %llu\n",
i,
kstat.per_cpu_user[cpu_logical_map(i)],
kstat.per_cpu_nice[cpu_logical_map(i)],
kstat.per_cpu_system[cpu_logical_map(i)],
- jif - ( kstat.per_cpu_user[cpu_logical_map(i)] \
- + kstat.per_cpu_nice[cpu_logical_map(i)] \
- + kstat.per_cpu_system[cpu_logical_map(i)]));
+ (unsigned long long) jif
+ - kstat.per_cpu_user[cpu_logical_map(i)]
+ - kstat.per_cpu_nice[cpu_logical_map(i)]
+ - kstat.per_cpu_system[cpu_logical_map(i)]);
len += sprintf(page + len,
"page %u %u\n"
"swap %u %u\n"
@@ -302,12 +361,13 @@
}
}

+ do_div(jif, HZ);
len += sprintf(page + len,
"\nctxt %u\n"
"btime %lu\n"
"processes %lu\n",
kstat.context_swtch,
- xtime.tv_sec - jif / HZ,
+ xtime.tv_sec - (unsigned long) jif,
total_forks);

return proc_calc_metrics(page, start, off, count, eof, len);
@@ -565,4 +625,6 @@
slabinfo_read_proc, NULL);
if (entry)
entry->write_proc = slabinfo_write_proc;
+
+ init_idlewrap_timer();
}

--- linux-2.4.19-pre2/mm/oom_kill.c Sun Nov 4 02:05:25 2001
+++ linux-2.4.19-pre2-j64/mm/oom_kill.c Fri Mar 1 00:05:24 2002
@@ -69,11 +69,10 @@
/*
* CPU time is in seconds and run time is in minutes. There is no
* particular reason for this other than that it turned out to work
- * very well in practice. This is not safe against jiffie wraps
- * but we don't care _that_ much...
+ * very well in practice.
*/
cpu_time = (p->times.tms_utime + p->times.tms_stime) >> (SHIFT_HZ + 3);
- run_time = (jiffies - p->start_time) >> (SHIFT_HZ + 10);
+ run_time = (get_jiffies64() - p->start_time) >> (SHIFT_HZ + 10);

points /= int_sqrt(cpu_time);
points /= int_sqrt(int_sqrt(run_time));

--- linux-2.4.19-pre2/kernel/acct.c Thu Feb 28 23:52:26 2002
+++ linux-2.4.19-pre2-j64/kernel/acct.c Fri Mar 1 00:06:51 2002
@@ -56,6 +56,7 @@
#include <linux/tty.h>

#include <asm/uaccess.h>
+#include <asm/div64.h>

/*
* These constants control the amount of freespace that suspend and
@@ -227,20 +228,24 @@
* This routine has been adopted from the encode_comp_t() function in
* the kern_acct.c file of the FreeBSD operating system. The encoding
* is a 13-bit fraction with a 3-bit (base 8) exponent.
+ *
+ * Bumped up to encode 64 bit values. Unfortunately the result may
+ * overflow now.
*/

#define MANTSIZE 13 /* 13 bit mantissa. */
-#define EXPSIZE 3 /* Base 8 (3 bit) exponent. */
+#define EXPSIZE 3 /* 3 bit exponent. */
+#define EXPBASE 3 /* Base 8 (3 bit) exponent. */
#define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */

-static comp_t encode_comp_t(unsigned long value)
+static comp_t encode_comp_t(u64 value)
{
int exp, rnd;

exp = rnd = 0;
while (value > MAXFRACT) {
- rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */
- value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */
+ rnd = value & (1 << (EXPBASE - 1)); /* Round up? */
+ value >>= EXPBASE; /* Base 8 exponent == 3 bit shift. */
exp++;
}

@@ -248,16 +253,21 @@
* If we need to round up, do it (and handle overflow correctly).
*/
if (rnd && (++value > MAXFRACT)) {
- value >>= EXPSIZE;
+ value >>= EXPBASE;
exp++;
}

/*
* Clean it up and polish it off.
*/
- exp <<= MANTSIZE; /* Shift the exponent into place */
- exp += value; /* and add on the mantissa. */
- return exp;
+ if (exp >= (1 << EXPSIZE)) {
+ /* Overflow. Return largest representable number instead. */
+ return (1ul << (MANTSIZE + EXPSIZE)) - 1;
+ } else {
+ exp <<= MANTSIZE; /* Shift the exponent into place */
+ exp += value; /* and add on the mantissa. */
+ return exp;
+ }
}

/*
@@ -278,6 +288,7 @@
mm_segment_t fs;
unsigned long vsize;
unsigned long flim;
+ u64 elapsed;

/*
* First check to see if there is enough free_space to continue
@@ -295,8 +306,10 @@
strncpy(ac.ac_comm, current->comm, ACCT_COMM);
ac.ac_comm[ACCT_COMM - 1] = '\0';

- ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ));
- ac.ac_etime = encode_comp_t(jiffies - current->start_time);
+ elapsed = get_jiffies64() - current->start_time;
+ ac.ac_etime = encode_comp_t(elapsed);
+ do_div(elapsed, HZ);
+ ac.ac_btime = xtime.tv_sec - elapsed;
ac.ac_utime = encode_comp_t(current->times.tms_utime);
ac.ac_stime = encode_comp_t(current->times.tms_stime);
ac.ac_uid = current->uid;


2002-03-01 03:07:50

by Tim Schmielau

[permalink] [raw]
Subject: [patch] enable uptime display > 497 days on 32 bit (2/2)

Debug option to move jiffies wrap forward to five minutes after boot.
Rediffed to 2.4.19-pre2 and added a printk to make the option visible
in dmesg (suggested by Andreas Dilger)

Tim


--- linux-2.4.19-pre2-j64/include/linux/timex.h Fri Mar 1 01:01:05 2002
+++ linux-2.4.19-pre2-j64-dbg/include/linux/timex.h Fri Mar 1 02:39:09 2002
@@ -53,6 +53,13 @@

#include <asm/param.h>

+#ifdef CONFIG_DEBUG_JIFFIESWRAP
+ /* Make the jiffies counter wrap around sooner. */
+# define INITIAL_JIFFIES ((unsigned long)(-300*HZ))
+#else
+# define INITIAL_JIFFIES 0
+#endif
+
/*
* The following defines establish the engineering parameters of the PLL
* model. The HZ variable establishes the timer interrupt frequency, 100 Hz

--- linux-2.4.19-pre2-j64/kernel/timer.c Fri Mar 1 00:45:04 2002
+++ linux-2.4.19-pre2-j64-dbg/kernel/timer.c Fri Mar 1 03:00:07 2002
@@ -65,9 +65,9 @@

extern int do_setitimer(int, struct itimerval *, struct itimerval *);

-unsigned long volatile jiffies;
+unsigned long volatile jiffies = INITIAL_JIFFIES;
#ifdef NEEDS_JIFFIES64
- static unsigned long jiffies_hi, jiffies_last;
+ static unsigned long jiffies_hi, jiffies_last = INITIAL_JIFFIES;
static spinlock_t jiffies64_lock = SPIN_LOCK_UNLOCKED;
#endif

@@ -122,10 +122,21 @@
for (i = 0; i < TVR_SIZE; i++)
INIT_LIST_HEAD(tv1.vec + i);

+#ifdef CONFIG_DEBUG_JIFFIESWRAP
+ tv1.index = INITIAL_JIFFIES & TVR_MASK;
+ tv2.index = (INITIAL_JIFFIES >> TVR_BITS) & TVN_MASK;
+ tv3.index = (INITIAL_JIFFIES >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+ tv4.index = (INITIAL_JIFFIES >> (TVR_BITS + 2*TVN_BITS)) & TVN_MASK;
+ tv5.index = (INITIAL_JIFFIES >> (TVR_BITS + 3*TVN_BITS)) & TVN_MASK;
+
+ printk(KERN_NOTICE "Set up jiffies counter to wrap in %ld seconds.\n",
+ (-(long)jiffies)/HZ);
+#endif
+
init_jiffieswrap_timer();
}

-static unsigned long timer_jiffies;
+static unsigned long timer_jiffies = INITIAL_JIFFIES;

static inline void internal_add_timer(struct timer_list *timer)
{
@@ -646,7 +657,7 @@
}

/* jiffies at the most recent update of wall time */
-unsigned long wall_jiffies;
+unsigned long wall_jiffies = INITIAL_JIFFIES;

/*
* This spinlock protect us from races in SMP while playing with xtime. -arca

--- linux-2.4.19-pre2-j64/fs/proc/array.c Fri Mar 1 00:05:24 2002
+++ linux-2.4.19-pre2-j64-dbg/fs/proc/array.c Fri Mar 1 01:38:50 2002
@@ -366,7 +366,7 @@
nice,
0UL /* removed */,
task->it_real_value,
- (unsigned long long)(task->start_time),
+ (unsigned long long)(task->start_time) - INITIAL_JIFFIES,
vsize,
mm ? mm->rss : 0, /* you might want to shift this left 3 */
task->rlim[RLIMIT_RSS].rlim_cur,

--- linux-2.4.19-pre2-j64/fs/proc/proc_misc.c Fri Mar 1 01:32:28 2002
+++ linux-2.4.19-pre2-j64-dbg/fs/proc/proc_misc.c Fri Mar 1 01:38:50 2002
@@ -164,7 +164,7 @@
unsigned long uptime_remainder, idle_remainder;
int len;

- uptime = get_jiffies64();
+ uptime = get_jiffies64() - INITIAL_JIFFIES;
uptime_remainder = (unsigned long) do_div(uptime, HZ);
idle = get_idle64();
idle_remainder = (unsigned long) do_div(idle, HZ);
@@ -297,7 +297,7 @@
{
int i, len;
extern unsigned long total_forks;
- u64 jif = get_jiffies64();
+ u64 jif = get_jiffies64() - INITIAL_JIFFIES;
unsigned int sum = 0, user = 0, nice = 0, system = 0;
int major, disk;


--- linux-2.4.19-pre2-j64/kernel/info.c Fri Mar 1 00:05:24 2002
+++ linux-2.4.19-pre2-j64-dbg/kernel/info.c Fri Mar 1 01:38:50 2002
@@ -22,7 +22,7 @@
memset((char *)&val, 0, sizeof(struct sysinfo));

cli();
- uptime = get_jiffies64();
+ uptime = get_jiffies64() - INITIAL_JIFFIES;
do_div(uptime, HZ);
val.uptime = (unsigned long) uptime;


--- linux-2.4.19-pre2-j64/Documentation/Configure.help Thu Feb 28 23:52:03 2002
+++ linux-2.4.19-pre2-j64-dbg/Documentation/Configure.help Fri Mar 1 01:38:50 2002
@@ -24081,6 +24081,14 @@
of the BUG call as well as the EIP and oops trace. This aids
debugging but costs about 70-100K of memory.

+Debug jiffies counter wraparound (DANGEROUS)
+CONFIG_DEBUG_JIFFIESWRAP
+ Say Y here to initialize the jiffies counter to a value 5 minutes
+ before wraparound. This may make your system UNSTABLE and its
+ only use is to hunt down the causes of this instability.
+ If you don't know what the jiffies counter is or if you want
+ a stable system, say N.
+
Include kgdb kernel debugger
CONFIG_KGDB
Include in-kernel hooks for kgdb, the Linux kernel source level

--- linux-2.4.19-pre2-j64/arch/arm/config.in Fri Nov 9 22:58:02 2001
+++ linux-2.4.19-pre2-j64-dbg/arch/arm/config.in Fri Mar 1 01:38:50 2002
@@ -601,6 +601,7 @@
bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ
bool 'Spinlock debugging' CONFIG_DEBUG_SPINLOCK
dep_bool 'Disable pgtable cache' CONFIG_NO_PGT_CACHE $CONFIG_CPU_26
+bool 'Debug jiffies counter wraparound (DANGEROUS)' CONFIG_DEBUG_JIFFIESWRAP
# These options are only for real kernel hackers who want to get their hands dirty.
dep_bool 'Kernel low-level debugging functions' CONFIG_DEBUG_LL $CONFIG_EXPERIMENTAL
dep_bool ' Kernel low-level debugging messages via footbridge serial port' CONFIG_DEBUG_DC21285_PORT $CONFIG_DEBUG_LL $CONFIG_FOOTBRIDGE

--- linux-2.4.19-pre2-j64/arch/cris/config.in Sun Feb 24 19:20:36 2002
+++ linux-2.4.19-pre2-j64-dbg/arch/cris/config.in Fri Mar 1 01:38:50 2002
@@ -253,4 +253,5 @@
if [ "$CONFIG_PROFILE" = "y" ]; then
int ' Profile shift count' CONFIG_PROFILE_SHIFT 2
fi
+bool 'Debug jiffies counter wraparound (DANGEROUS)' CONFIG_DEBUG_JIFFIESWRAP
endmenu

--- linux-2.4.19-pre2-j64/arch/i386/config.in Thu Feb 28 23:52:04 2002
+++ linux-2.4.19-pre2-j64-dbg/arch/i386/config.in Fri Mar 1 01:38:50 2002
@@ -425,6 +425,7 @@
bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ
bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK
bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE
+ bool ' Debug jiffies counter wraparound (DANGEROUS)' CONFIG_DEBUG_JIFFIESWRAP
fi

endmenu

--- linux-2.4.19-pre2-j64/arch/m68k/config.in Tue Jun 12 04:15:27 2001
+++ linux-2.4.19-pre2-j64-dbg/arch/m68k/config.in Fri Mar 1 01:38:50 2002
@@ -545,4 +545,5 @@

#bool 'Debug kmalloc/kfree' CONFIG_DEBUG_MALLOC
bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ
+bool 'Debug jiffies counter wraparound (DANGEROUS)' CONFIG_DEBUG_JIFFIESWRAP
endmenu

--- linux-2.4.19-pre2-j64/arch/mips/config.in Thu Feb 28 23:52:05 2002
+++ linux-2.4.19-pre2-j64-dbg/arch/mips/config.in Fri Mar 1 01:38:50 2002
@@ -643,4 +643,5 @@
if [ "$CONFIG_SMP" != "y" ]; then
bool 'Run uncached' CONFIG_MIPS_UNCACHED
fi
+bool 'Debug jiffies counter wraparound (DANGEROUS)' CONFIG_DEBUG_JIFFIESWRAP
endmenu

--- linux-2.4.19-pre2-j64/arch/parisc/config.in Wed Apr 18 02:19:25 2001
+++ linux-2.4.19-pre2-j64-dbg/arch/parisc/config.in Fri Mar 1 01:38:50 2002
@@ -206,5 +206,6 @@

#bool 'Debug kmalloc/kfree' CONFIG_DEBUG_MALLOC
bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ
+bool 'Debug jiffies counter wraparound (DANGEROUS)' CONFIG_DEBUG_JIFFIESWRAP
endmenu


--- linux-2.4.19-pre2-j64/arch/ppc/config.in Thu Feb 28 23:52:07 2002
+++ linux-2.4.19-pre2-j64-dbg/arch/ppc/config.in Fri Mar 1 01:38:50 2002
@@ -402,4 +402,5 @@
bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ
bool 'Include kgdb kernel debugger' CONFIG_KGDB
bool 'Include xmon kernel debugger' CONFIG_XMON
+bool 'Debug jiffies counter wraparound (DANGEROUS)' CONFIG_DEBUG_JIFFIESWRAP
endmenu

--- linux-2.4.19-pre2-j64/arch/sh/config.in Sun Feb 24 19:20:37 2002
+++ linux-2.4.19-pre2-j64-dbg/arch/sh/config.in Fri Mar 1 01:38:50 2002
@@ -385,4 +385,5 @@
if [ "$CONFIG_SH_STANDARD_BIOS" = "y" ]; then
bool 'Early printk support' CONFIG_SH_EARLY_PRINTK
fi
+bool 'Debug jiffies counter wraparound (DANGEROUS)' CONFIG_DEBUG_JIFFIESWRAP
endmenu

--- linux-2.4.19-pre2-j64/arch/sparc/config.in Tue Jun 12 04:15:27 2001
+++ linux-2.4.19-pre2-j64-dbg/arch/sparc/config.in Fri Mar 1 01:38:50 2002
@@ -265,4 +265,5 @@
comment 'Kernel hacking'

bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ
+bool 'Debug jiffies counter wraparound (DANGEROUS)' CONFIG_DEBUG_JIFFIESWRAP
endmenu

2002-03-01 09:11:16

by George Anzinger

[permalink] [raw]
Subject: Re: [patch] enable uptime display > 497 days on 32 bit (1/2)

Tim Schmielau wrote:
>
> rediffed to 2.4.19-pre2 and three micro-optimizations:
>
> move jiffies_hi etc. to same cacheline as jiffies
> (suggested by George Anzinger)
> avoid turning off interrupts (suggested by Andreas Dilger)
> use unlikely() (suggested by Andreas Dilger)
>
> As no other comments turned up, this will go to Marcelo RSN.
> (wondered why noone vetoed this as overkill...)

I figure that is Marcelo's job :)
>
> Tim
>
> --- linux-2.4.19-pre2/include/linux/sched.h Thu Feb 28 23:52:25 2002
> +++ linux-2.4.19-pre2-j64/include/linux/sched.h Fri Mar 1 02:39:09 2002
> @@ -361,7 +361,7 @@
> unsigned long it_real_incr, it_prof_incr, it_virt_incr;
> struct timer_list real_timer;
> struct tms times;
> - unsigned long start_time;
> + u64 start_time;
> long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS];
> /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
> unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
> @@ -573,6 +573,18 @@
> #include <asm/current.h>
>
> extern unsigned long volatile jiffies;
> +#if BITS_PER_LONG < 48
> +# define NEEDS_JIFFIES64
> + extern u64 get_jiffies64(void);
> +#else
> + /* jiffies is wide enough to not wrap for 8716 years at HZ==1024 */
> + static inline u64 get_jiffies64(void)
> + {
> + return (u64)jiffies;
> + }
> +#endif
> +
> +
> extern unsigned long itimer_ticks;
> extern unsigned long itimer_next;
> extern struct timeval xtime;
>
> --- linux-2.4.19-pre2/kernel/timer.c Mon Oct 8 19:41:41 2001
> +++ linux-2.4.19-pre2-j64/kernel/timer.c Fri Mar 1 00:45:04 2002
> @@ -66,6 +66,10 @@
> extern int do_setitimer(int, struct itimerval *, struct itimerval *);
>
> unsigned long volatile jiffies;
> +#ifdef NEEDS_JIFFIES64
> + static unsigned long jiffies_hi, jiffies_last;
> + static spinlock_t jiffies64_lock = SPIN_LOCK_UNLOCKED;
> +#endif
>
> unsigned int * prof_buffer;
> unsigned long prof_len;
> @@ -103,6 +107,8 @@
>
> #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
>
> +static inline void init_jiffieswrap_timer(void);
> +
> void init_timervecs (void)
> {
> int i;
> @@ -115,6 +121,8 @@
> }
> for (i = 0; i < TVR_SIZE; i++)
> INIT_LIST_HEAD(tv1.vec + i);
> +
> + init_jiffieswrap_timer();
> }
>
> static unsigned long timer_jiffies;
> @@ -683,6 +691,61 @@
> if (TQ_ACTIVE(tq_timer))
> mark_bh(TQUEUE_BH);
> }
> +
> +
> +#ifdef NEEDS_JIFFIES64
> +
> +u64 get_jiffies64(void)
> +{
> + unsigned long jiffies_tmp, jiffies_hi_tmp;
> +
> + spin_lock(&jiffies64_lock);
> + jiffies_tmp = jiffies; /* avoid races */
> + jiffies_hi_tmp = jiffies_hi;
> + if (unlikely(jiffies_tmp < jiffies_last)) /* We have a wrap */
> + jiffies_hi++;
> + jiffies_last = jiffies_tmp;
> + spin_unlock(&jiffies64_lock);
> +
> + return (jiffies_tmp | ((u64)jiffies_hi_tmp) << BITS_PER_LONG);
> +}
> +
> +/* use a timer to periodically check for jiffies overflow */
> +
> +static struct timer_list jiffieswrap_timer;
> +#define CHECK_JIFFIESWRAP_INTERVAL (1ul << (BITS_PER_LONG-2))
> +
> +static void check_jiffieswrap(unsigned long data)
> +{
> + unsigned long jiffies_tmp;
> + mod_timer(&jiffieswrap_timer, jiffies + CHECK_JIFFIESWRAP_INTERVAL);
> +
> + if (spin_trylock(&jiffies64_lock)) {
> + /* If we don't get the lock, we can just give up.
> + The current holder of the lock will check for wraps */
> + jiffies_tmp = jiffies; /* avoid races */
> + if (jiffies_tmp < jiffies_last) /* We have a wrap */
> + jiffies_hi++;
> + jiffies_last = jiffies_tmp;
> + spin_unlock(&jiffies64_lock);
> + } }
> +
> +static inline void init_jiffieswrap_timer(void)
> +{
> + init_timer(&jiffieswrap_timer);
> + jiffieswrap_timer.expires = jiffies + CHECK_JIFFIESWRAP_INTERVAL;
> + jiffieswrap_timer.function = check_jiffieswrap;
> + add_timer(&jiffieswrap_timer);
> +}
> +
> +#else
> +
> +static inline void init_jiffieswrap_timer(void)
> +{
> +}
> +
> +#endif /* NEEDS_JIFFIES64 */
> +
>
> #if !defined(__alpha__) && !defined(__ia64__)
>
>
> --- linux-2.4.19-pre2/kernel/fork.c Sun Feb 24 19:20:43 2002
> +++ linux-2.4.19-pre2-j64/kernel/fork.c Fri Mar 1 00:05:24 2002
> @@ -657,7 +657,7 @@
> }
> #endif
> p->lock_depth = -1; /* -1 = no lock */
> - p->start_time = jiffies;
> + p->start_time = get_jiffies64();
>
> INIT_LIST_HEAD(&p->local_pages);
>
>
> --- linux-2.4.19-pre2/kernel/info.c Sat Apr 21 01:15:40 2001
> +++ linux-2.4.19-pre2-j64/kernel/info.c Fri Mar 1 00:05:24 2002
> @@ -12,15 +12,19 @@
> #include <linux/smp_lock.h>
>
> #include <asm/uaccess.h>
> +#include <asm/div64.h>
>
> asmlinkage long sys_sysinfo(struct sysinfo *info)
> {
> struct sysinfo val;
> + u64 uptime;
>
> memset((char *)&val, 0, sizeof(struct sysinfo));
>
> cli();
> - val.uptime = jiffies / HZ;
> + uptime = get_jiffies64();
> + do_div(uptime, HZ);
> + val.uptime = (unsigned long) uptime;
>
> val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
> val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
>
> --- linux-2.4.19-pre2/fs/proc/array.c Thu Oct 11 18:00:01 2001
> +++ linux-2.4.19-pre2-j64/fs/proc/array.c Fri Mar 1 00:05:24 2002
> @@ -343,7 +343,7 @@
> ppid = task->pid ? task->p_opptr->pid : 0;
> read_unlock(&tasklist_lock);
> res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
> -%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
> +%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %llu %lu %ld %lu %lu %lu %lu %lu \
> %lu %lu %lu %lu %lu %lu %lu %lu %d %d\n",
> task->pid,
> task->comm,
> @@ -366,7 +366,7 @@
> nice,
> 0UL /* removed */,
> task->it_real_value,
> - task->start_time,
> + (unsigned long long)(task->start_time),
> vsize,
> mm ? mm->rss : 0, /* you might want to shift this left 3 */
> task->rlim[RLIMIT_RSS].rlim_cur,
>
> --- linux-2.4.19-pre2/fs/proc/proc_misc.c Wed Nov 21 06:29:09 2001
> +++ linux-2.4.19-pre2-j64/fs/proc/proc_misc.c Fri Mar 1 01:32:28 2002
> @@ -40,6 +40,7 @@
> #include <asm/uaccess.h>
> #include <asm/pgtable.h>
> #include <asm/io.h>
> +#include <asm/div64.h>
>
>
> #define LOAD_INT(x) ((x) >> FSHIFT)
> @@ -93,37 +94,93 @@
> return proc_calc_metrics(page, start, off, count, eof, len);
> }
>
> +#if BITS_PER_LONG < 48
> +static unsigned long idle_hi, idle_last;
> +static spinlock_t idle64_lock = SPIN_LOCK_UNLOCKED;
> +
> +u64 get_idle64(void)
> +{
> + unsigned long idle, idle_hi_tmp;
> +
> + spin_lock(&idle64_lock);
> + idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
> + if (unlikely(idle < idle_last)) /* We have a wrap */
> + idle_hi++;
> + idle_last = idle;
> + idle_hi_tmp = idle_hi;
> + spin_unlock(&idle64_lock);
> +
> + return (idle | ((u64)idle_hi_tmp) << BITS_PER_LONG);
> +}
> +
> +/* use a timer to periodically check for idle time overflow */
> +
> +static struct timer_list idlewrap_timer;
> +#define CHECK_IDLEWRAP_INTERVAL (1ul << (BITS_PER_LONG-2))
> +
> +static void check_idlewrap(unsigned long data)
> +{
> + unsigned long idle;
> +
> + mod_timer(&idlewrap_timer, jiffies + CHECK_IDLEWRAP_INTERVAL);
> + if (spin_trylock(&idle64_lock)) {
> + /* If we don't get the lock, we can just give up.
> + The current holder of the lock will check for wraps */
> + idle = init_tasks[0]->times.tms_utime
> + + init_tasks[0]->times.tms_stime;
> + if (idle < idle_last) /* We have a wrap */
> + idle_hi++;
> + idle_last = idle;
> + spin_unlock(&idle64_lock);
> + } }
> +
> +static inline void init_idlewrap_timer(void)
> +{
> + init_timer(&idlewrap_timer);
> + idlewrap_timer.expires = jiffies + CHECK_IDLEWRAP_INTERVAL;
> + idlewrap_timer.function = check_idlewrap;
> + add_timer(&idlewrap_timer);
> +}
> +
> +#else
> + /* Idle time won't overflow for 8716 years at HZ==1024 */
> +
> +static inline u64 get_idle64(void)
> +{
> + return (u64)(init_tasks[0]->times.tms_utime
> + + init_tasks[0]->times.tms_stime);
> +}
> +
> +static inline void init_idlewrap_timer(void)
> +{
> +}
> +
> +#endif /* BITS_PER_LONG < 48 */
> +
> static int uptime_read_proc(char *page, char **start, off_t off,
> int count, int *eof, void *data)
> {
> - unsigned long uptime;
> - unsigned long idle;
> + u64 uptime, idle;
> + unsigned long uptime_remainder, idle_remainder;
> int len;
>
> - uptime = jiffies;
> - idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
> + uptime = get_jiffies64();
> + uptime_remainder = (unsigned long) do_div(uptime, HZ);
> + idle = get_idle64();
> + idle_remainder = (unsigned long) do_div(idle, HZ);
>
> - /* The formula for the fraction parts really is ((t * 100) / HZ) % 100, but
> - that would overflow about every five days at HZ == 100.
> - Therefore the identity a = (a / b) * b + a % b is used so that it is
> - calculated as (((t / HZ) * 100) + ((t % HZ) * 100) / HZ) % 100.
> - The part in front of the '+' always evaluates as 0 (mod 100). All divisions
> - in the above formulas are truncating. For HZ being a power of 10, the
> - calculations simplify to the version in the #else part (if the printf
> - format is adapted to the same number of digits as zeroes in HZ.
> - */
> #if HZ!=100
> len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
> - uptime / HZ,
> - (((uptime % HZ) * 100) / HZ) % 100,
> - idle / HZ,
> - (((idle % HZ) * 100) / HZ) % 100);
> + (unsigned long) uptime,
> + (uptime_remainder * 100) / HZ,
> + (unsigned long) idle,
> + (idle_remainder * 100) / HZ);
> #else
> len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
> - uptime / HZ,
> - uptime % HZ,
> - idle / HZ,
> - idle % HZ);
> + (unsigned long) uptime,
> + uptime_remainder,
> + (unsigned long) idle,
> + idle_remainder);
> #endif
> return proc_calc_metrics(page, start, off, count, eof, len);
> }
> @@ -240,7 +297,7 @@
> {
> int i, len;
> extern unsigned long total_forks;
> - unsigned long jif = jiffies;
> + u64 jif = get_jiffies64();
> unsigned int sum = 0, user = 0, nice = 0, system = 0;
> int major, disk;
>
> @@ -256,17 +313,19 @@
> #endif
> }
>
> - len = sprintf(page, "cpu %u %u %u %lu\n", user, nice, system,
> - jif * smp_num_cpus - (user + nice + system));
> + len = sprintf(page, "cpu %u %u %u %llu\n", user, nice, system,
> + (unsigned long long) jif * smp_num_cpus
> + - user - nice - system);
> for (i = 0 ; i < smp_num_cpus; i++)
> - len += sprintf(page + len, "cpu%d %u %u %u %lu\n",
> + len += sprintf(page + len, "cpu%d %u %u %u %llu\n",
> i,
> kstat.per_cpu_user[cpu_logical_map(i)],
> kstat.per_cpu_nice[cpu_logical_map(i)],
> kstat.per_cpu_system[cpu_logical_map(i)],
> - jif - ( kstat.per_cpu_user[cpu_logical_map(i)] \
> - + kstat.per_cpu_nice[cpu_logical_map(i)] \
> - + kstat.per_cpu_system[cpu_logical_map(i)]));
> + (unsigned long long) jif
> + - kstat.per_cpu_user[cpu_logical_map(i)]
> + - kstat.per_cpu_nice[cpu_logical_map(i)]
> + - kstat.per_cpu_system[cpu_logical_map(i)]);
> len += sprintf(page + len,
> "page %u %u\n"
> "swap %u %u\n"
> @@ -302,12 +361,13 @@
> }
> }
>
> + do_div(jif, HZ);
> len += sprintf(page + len,
> "\nctxt %u\n"
> "btime %lu\n"
> "processes %lu\n",
> kstat.context_swtch,
> - xtime.tv_sec - jif / HZ,
> + xtime.tv_sec - (unsigned long) jif,
> total_forks);
>
> return proc_calc_metrics(page, start, off, count, eof, len);
> @@ -565,4 +625,6 @@
> slabinfo_read_proc, NULL);
> if (entry)
> entry->write_proc = slabinfo_write_proc;
> +
> + init_idlewrap_timer();
> }
>
> --- linux-2.4.19-pre2/mm/oom_kill.c Sun Nov 4 02:05:25 2001
> +++ linux-2.4.19-pre2-j64/mm/oom_kill.c Fri Mar 1 00:05:24 2002
> @@ -69,11 +69,10 @@
> /*
> * CPU time is in seconds and run time is in minutes. There is no
> * particular reason for this other than that it turned out to work
> - * very well in practice. This is not safe against jiffie wraps
> - * but we don't care _that_ much...
> + * very well in practice.
> */
> cpu_time = (p->times.tms_utime + p->times.tms_stime) >> (SHIFT_HZ + 3);
> - run_time = (jiffies - p->start_time) >> (SHIFT_HZ + 10);
> + run_time = (get_jiffies64() - p->start_time) >> (SHIFT_HZ + 10);
>
> points /= int_sqrt(cpu_time);
> points /= int_sqrt(int_sqrt(run_time));
>
> --- linux-2.4.19-pre2/kernel/acct.c Thu Feb 28 23:52:26 2002
> +++ linux-2.4.19-pre2-j64/kernel/acct.c Fri Mar 1 00:06:51 2002
> @@ -56,6 +56,7 @@
> #include <linux/tty.h>
>
> #include <asm/uaccess.h>
> +#include <asm/div64.h>
>
> /*
> * These constants control the amount of freespace that suspend and
> @@ -227,20 +228,24 @@
> * This routine has been adopted from the encode_comp_t() function in
> * the kern_acct.c file of the FreeBSD operating system. The encoding
> * is a 13-bit fraction with a 3-bit (base 8) exponent.
> + *
> + * Bumped up to encode 64 bit values. Unfortunately the result may
> + * overflow now.
> */
>
> #define MANTSIZE 13 /* 13 bit mantissa. */
> -#define EXPSIZE 3 /* Base 8 (3 bit) exponent. */
> +#define EXPSIZE 3 /* 3 bit exponent. */
> +#define EXPBASE 3 /* Base 8 (3 bit) exponent. */
> #define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */
>
> -static comp_t encode_comp_t(unsigned long value)
> +static comp_t encode_comp_t(u64 value)
> {
> int exp, rnd;
>
> exp = rnd = 0;
> while (value > MAXFRACT) {
> - rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */
> - value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */
> + rnd = value & (1 << (EXPBASE - 1)); /* Round up? */
> + value >>= EXPBASE; /* Base 8 exponent == 3 bit shift. */
> exp++;
> }
>
> @@ -248,16 +253,21 @@
> * If we need to round up, do it (and handle overflow correctly).
> */
> if (rnd && (++value > MAXFRACT)) {
> - value >>= EXPSIZE;
> + value >>= EXPBASE;
> exp++;
> }
>
> /*
> * Clean it up and polish it off.
> */
> - exp <<= MANTSIZE; /* Shift the exponent into place */
> - exp += value; /* and add on the mantissa. */
> - return exp;
> + if (exp >= (1 << EXPSIZE)) {
> + /* Overflow. Return largest representable number instead. */
> + return (1ul << (MANTSIZE + EXPSIZE)) - 1;
> + } else {
> + exp <<= MANTSIZE; /* Shift the exponent into place */
> + exp += value; /* and add on the mantissa. */
> + return exp;
> + }
> }
>
> /*
> @@ -278,6 +288,7 @@
> mm_segment_t fs;
> unsigned long vsize;
> unsigned long flim;
> + u64 elapsed;
>
> /*
> * First check to see if there is enough free_space to continue
> @@ -295,8 +306,10 @@
> strncpy(ac.ac_comm, current->comm, ACCT_COMM);
> ac.ac_comm[ACCT_COMM - 1] = '\0';
>
> - ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ));
> - ac.ac_etime = encode_comp_t(jiffies - current->start_time);
> + elapsed = get_jiffies64() - current->start_time;
> + ac.ac_etime = encode_comp_t(elapsed);
> + do_div(elapsed, HZ);
> + ac.ac_btime = xtime.tv_sec - elapsed;
> ac.ac_utime = encode_comp_t(current->times.tms_utime);
> ac.ac_stime = encode_comp_t(current->times.tms_stime);
> ac.ac_uid = current->uid;
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
George [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Real time sched: http://sourceforge.net/projects/rtsched/

2002-03-01 12:53:52

by Rik van Riel

[permalink] [raw]
Subject: Re: [patch] enable uptime display > 497 days on 32 bit (1/2)

On Fri, 1 Mar 2002, Tim Schmielau wrote:

> rediffed to 2.4.19-pre2 and three micro-optimizations:
>
> move jiffies_hi etc. to same cacheline as jiffies
> (suggested by George Anzinger)
> avoid turning off interrupts (suggested by Andreas Dilger)
> use unlikely() (suggested by Andreas Dilger)
>
> As no other comments turned up, this will go to Marcelo RSN.

Please merge with Linus first, otherwise you'll loose this
feature once you'll upgrade to 2.6 ...

> (wondered why noone vetoed this as overkill...)

I guess sneaking it past Linus will be the real test ;)

cheers,

Rik
--
"Linux holds advantages over the single-vendor commercial OS"
-- Microsoft's "Competing with Linux" document

http://www.surriel.com/ http://distro.conectiva.com/


2002-03-01 15:49:34

by George Anzinger

[permalink] [raw]
Subject: Re: [patch] enable uptime display > 497 days on 32 bit (1/2)

Tim Schmielau wrote:
>
> rediffed to 2.4.19-pre2 and three micro-optimizations:
>
> move jiffies_hi etc. to same cacheline as jiffies
> (suggested by George Anzinger)
> avoid turning off interrupts (suggested by Andreas Dilger)
> use unlikely() (suggested by Andreas Dilger)
>
> As no other comments turned up, this will go to Marcelo RSN.
> (wondered why noone vetoed this as overkill...)

I figure that is Marcelo's job :)
>
> Tim
>
> --- linux-2.4.19-pre2/include/linux/sched.h Thu Feb 28 23:52:25 2002
> +++ linux-2.4.19-pre2-j64/include/linux/sched.h Fri Mar 1 02:39:09 2002
> @@ -361,7 +361,7 @@
> unsigned long it_real_incr, it_prof_incr, it_virt_incr;
> struct timer_list real_timer;
> struct tms times;
> - unsigned long start_time;
> + u64 start_time;
> long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS];
> /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
> unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
> @@ -573,6 +573,18 @@
> #include <asm/current.h>
>
> extern unsigned long volatile jiffies;
> +#if BITS_PER_LONG < 48
> +# define NEEDS_JIFFIES64
> + extern u64 get_jiffies64(void);
> +#else
> + /* jiffies is wide enough to not wrap for 8716 years at HZ==1024 */
> + static inline u64 get_jiffies64(void)
> + {
> + return (u64)jiffies;
> + }
> +#endif
> +
> +
> extern unsigned long itimer_ticks;
> extern unsigned long itimer_next;
> extern struct timeval xtime;
>
> --- linux-2.4.19-pre2/kernel/timer.c Mon Oct 8 19:41:41 2001
> +++ linux-2.4.19-pre2-j64/kernel/timer.c Fri Mar 1 00:45:04 2002
> @@ -66,6 +66,10 @@
> extern int do_setitimer(int, struct itimerval *, struct itimerval *);
>
> unsigned long volatile jiffies;
> +#ifdef NEEDS_JIFFIES64
> + static unsigned long jiffies_hi, jiffies_last;
> + static spinlock_t jiffies64_lock = SPIN_LOCK_UNLOCKED;
> +#endif
>
> unsigned int * prof_buffer;
> unsigned long prof_len;
> @@ -103,6 +107,8 @@
>
> #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
>
> +static inline void init_jiffieswrap_timer(void);
> +
> void init_timervecs (void)
> {
> int i;
> @@ -115,6 +121,8 @@
> }
> for (i = 0; i < TVR_SIZE; i++)
> INIT_LIST_HEAD(tv1.vec + i);
> +
> + init_jiffieswrap_timer();
> }
>
> static unsigned long timer_jiffies;
> @@ -683,6 +691,61 @@
> if (TQ_ACTIVE(tq_timer))
> mark_bh(TQUEUE_BH);
> }
> +
> +
> +#ifdef NEEDS_JIFFIES64
> +
> +u64 get_jiffies64(void)
> +{
> + unsigned long jiffies_tmp, jiffies_hi_tmp;
> +
> + spin_lock(&jiffies64_lock);
> + jiffies_tmp = jiffies; /* avoid races */
> + jiffies_hi_tmp = jiffies_hi;
> + if (unlikely(jiffies_tmp < jiffies_last)) /* We have a wrap */
> + jiffies_hi++;
> + jiffies_last = jiffies_tmp;
> + spin_unlock(&jiffies64_lock);
> +
> + return (jiffies_tmp | ((u64)jiffies_hi_tmp) << BITS_PER_LONG);
> +}
> +
> +/* use a timer to periodically check for jiffies overflow */
> +
> +static struct timer_list jiffieswrap_timer;
> +#define CHECK_JIFFIESWRAP_INTERVAL (1ul << (BITS_PER_LONG-2))
> +
> +static void check_jiffieswrap(unsigned long data)
> +{
> + unsigned long jiffies_tmp;
> + mod_timer(&jiffieswrap_timer, jiffies + CHECK_JIFFIESWRAP_INTERVAL);
> +
> + if (spin_trylock(&jiffies64_lock)) {
> + /* If we don't get the lock, we can just give up.
> + The current holder of the lock will check for wraps */
> + jiffies_tmp = jiffies; /* avoid races */
> + if (jiffies_tmp < jiffies_last) /* We have a wrap */
> + jiffies_hi++;
> + jiffies_last = jiffies_tmp;
> + spin_unlock(&jiffies64_lock);
> + } }
> +
> +static inline void init_jiffieswrap_timer(void)
> +{
> + init_timer(&jiffieswrap_timer);
> + jiffieswrap_timer.expires = jiffies + CHECK_JIFFIESWRAP_INTERVAL;
> + jiffieswrap_timer.function = check_jiffieswrap;
> + add_timer(&jiffieswrap_timer);
> +}
> +
> +#else
> +
> +static inline void init_jiffieswrap_timer(void)
> +{
> +}
> +
> +#endif /* NEEDS_JIFFIES64 */
> +
>
> #if !defined(__alpha__) && !defined(__ia64__)
>
>
> --- linux-2.4.19-pre2/kernel/fork.c Sun Feb 24 19:20:43 2002
> +++ linux-2.4.19-pre2-j64/kernel/fork.c Fri Mar 1 00:05:24 2002
> @@ -657,7 +657,7 @@
> }
> #endif
> p->lock_depth = -1; /* -1 = no lock */
> - p->start_time = jiffies;
> + p->start_time = get_jiffies64();
>
> INIT_LIST_HEAD(&p->local_pages);
>
>
> --- linux-2.4.19-pre2/kernel/info.c Sat Apr 21 01:15:40 2001
> +++ linux-2.4.19-pre2-j64/kernel/info.c Fri Mar 1 00:05:24 2002
> @@ -12,15 +12,19 @@
> #include <linux/smp_lock.h>
>
> #include <asm/uaccess.h>
> +#include <asm/div64.h>
>
> asmlinkage long sys_sysinfo(struct sysinfo *info)
> {
> struct sysinfo val;
> + u64 uptime;
>
> memset((char *)&val, 0, sizeof(struct sysinfo));
>
> cli();
> - val.uptime = jiffies / HZ;
> + uptime = get_jiffies64();
> + do_div(uptime, HZ);
> + val.uptime = (unsigned long) uptime;
>
> val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
> val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
>
> --- linux-2.4.19-pre2/fs/proc/array.c Thu Oct 11 18:00:01 2001
> +++ linux-2.4.19-pre2-j64/fs/proc/array.c Fri Mar 1 00:05:24 2002
> @@ -343,7 +343,7 @@
> ppid = task->pid ? task->p_opptr->pid : 0;
> read_unlock(&tasklist_lock);
> res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
> -%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
> +%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %llu %lu %ld %lu %lu %lu %lu %lu \
> %lu %lu %lu %lu %lu %lu %lu %lu %d %d\n",
> task->pid,
> task->comm,
> @@ -366,7 +366,7 @@
> nice,
> 0UL /* removed */,
> task->it_real_value,
> - task->start_time,
> + (unsigned long long)(task->start_time),
> vsize,
> mm ? mm->rss : 0, /* you might want to shift this left 3 */
> task->rlim[RLIMIT_RSS].rlim_cur,
>
> --- linux-2.4.19-pre2/fs/proc/proc_misc.c Wed Nov 21 06:29:09 2001
> +++ linux-2.4.19-pre2-j64/fs/proc/proc_misc.c Fri Mar 1 01:32:28 2002
> @@ -40,6 +40,7 @@
> #include <asm/uaccess.h>
> #include <asm/pgtable.h>
> #include <asm/io.h>
> +#include <asm/div64.h>
>
>
> #define LOAD_INT(x) ((x) >> FSHIFT)
> @@ -93,37 +94,93 @@
> return proc_calc_metrics(page, start, off, count, eof, len);
> }
>
> +#if BITS_PER_LONG < 48
> +static unsigned long idle_hi, idle_last;
> +static spinlock_t idle64_lock = SPIN_LOCK_UNLOCKED;
> +
> +u64 get_idle64(void)
> +{
> + unsigned long idle, idle_hi_tmp;
> +
> + spin_lock(&idle64_lock);
> + idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
> + if (unlikely(idle < idle_last)) /* We have a wrap */
> + idle_hi++;
> + idle_last = idle;
> + idle_hi_tmp = idle_hi;
> + spin_unlock(&idle64_lock);
> +
> + return (idle | ((u64)idle_hi_tmp) << BITS_PER_LONG);
> +}
> +
> +/* use a timer to periodically check for idle time overflow */
> +
> +static struct timer_list idlewrap_timer;
> +#define CHECK_IDLEWRAP_INTERVAL (1ul << (BITS_PER_LONG-2))
> +
> +static void check_idlewrap(unsigned long data)
> +{
> + unsigned long idle;
> +
> + mod_timer(&idlewrap_timer, jiffies + CHECK_IDLEWRAP_INTERVAL);
> + if (spin_trylock(&idle64_lock)) {
> + /* If we don't get the lock, we can just give up.
> + The current holder of the lock will check for wraps */
> + idle = init_tasks[0]->times.tms_utime
> + + init_tasks[0]->times.tms_stime;
> + if (idle < idle_last) /* We have a wrap */
> + idle_hi++;
> + idle_last = idle;
> + spin_unlock(&idle64_lock);
> + } }
> +
> +static inline void init_idlewrap_timer(void)
> +{
> + init_timer(&idlewrap_timer);
> + idlewrap_timer.expires = jiffies + CHECK_IDLEWRAP_INTERVAL;
> + idlewrap_timer.function = check_idlewrap;
> + add_timer(&idlewrap_timer);
> +}
> +
> +#else
> + /* Idle time won't overflow for 8716 years at HZ==1024 */
> +
> +static inline u64 get_idle64(void)
> +{
> + return (u64)(init_tasks[0]->times.tms_utime
> + + init_tasks[0]->times.tms_stime);
> +}
> +
> +static inline void init_idlewrap_timer(void)
> +{
> +}
> +
> +#endif /* BITS_PER_LONG < 48 */
> +
> static int uptime_read_proc(char *page, char **start, off_t off,
> int count, int *eof, void *data)
> {
> - unsigned long uptime;
> - unsigned long idle;
> + u64 uptime, idle;
> + unsigned long uptime_remainder, idle_remainder;
> int len;
>
> - uptime = jiffies;
> - idle = init_tasks[0]->times.tms_utime + init_tasks[0]->times.tms_stime;
> + uptime = get_jiffies64();
> + uptime_remainder = (unsigned long) do_div(uptime, HZ);
> + idle = get_idle64();
> + idle_remainder = (unsigned long) do_div(idle, HZ);
>
> - /* The formula for the fraction parts really is ((t * 100) / HZ) % 100, but
> - that would overflow about every five days at HZ == 100.
> - Therefore the identity a = (a / b) * b + a % b is used so that it is
> - calculated as (((t / HZ) * 100) + ((t % HZ) * 100) / HZ) % 100.
> - The part in front of the '+' always evaluates as 0 (mod 100). All divisions
> - in the above formulas are truncating. For HZ being a power of 10, the
> - calculations simplify to the version in the #else part (if the printf
> - format is adapted to the same number of digits as zeroes in HZ.
> - */
> #if HZ!=100
> len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
> - uptime / HZ,
> - (((uptime % HZ) * 100) / HZ) % 100,
> - idle / HZ,
> - (((idle % HZ) * 100) / HZ) % 100);
> + (unsigned long) uptime,
> + (uptime_remainder * 100) / HZ,
> + (unsigned long) idle,
> + (idle_remainder * 100) / HZ);
> #else
> len = sprintf(page,"%lu.%02lu %lu.%02lu\n",
> - uptime / HZ,
> - uptime % HZ,
> - idle / HZ,
> - idle % HZ);
> + (unsigned long) uptime,
> + uptime_remainder,
> + (unsigned long) idle,
> + idle_remainder);
> #endif
> return proc_calc_metrics(page, start, off, count, eof, len);
> }
> @@ -240,7 +297,7 @@
> {
> int i, len;
> extern unsigned long total_forks;
> - unsigned long jif = jiffies;
> + u64 jif = get_jiffies64();
> unsigned int sum = 0, user = 0, nice = 0, system = 0;
> int major, disk;
>
> @@ -256,17 +313,19 @@
> #endif
> }
>
> - len = sprintf(page, "cpu %u %u %u %lu\n", user, nice, system,
> - jif * smp_num_cpus - (user + nice + system));
> + len = sprintf(page, "cpu %u %u %u %llu\n", user, nice, system,
> + (unsigned long long) jif * smp_num_cpus
> + - user - nice - system);
> for (i = 0 ; i < smp_num_cpus; i++)
> - len += sprintf(page + len, "cpu%d %u %u %u %lu\n",
> + len += sprintf(page + len, "cpu%d %u %u %u %llu\n",
> i,
> kstat.per_cpu_user[cpu_logical_map(i)],
> kstat.per_cpu_nice[cpu_logical_map(i)],
> kstat.per_cpu_system[cpu_logical_map(i)],
> - jif - ( kstat.per_cpu_user[cpu_logical_map(i)] \
> - + kstat.per_cpu_nice[cpu_logical_map(i)] \
> - + kstat.per_cpu_system[cpu_logical_map(i)]));
> + (unsigned long long) jif
> + - kstat.per_cpu_user[cpu_logical_map(i)]
> + - kstat.per_cpu_nice[cpu_logical_map(i)]
> + - kstat.per_cpu_system[cpu_logical_map(i)]);
> len += sprintf(page + len,
> "page %u %u\n"
> "swap %u %u\n"
> @@ -302,12 +361,13 @@
> }
> }
>
> + do_div(jif, HZ);
> len += sprintf(page + len,
> "\nctxt %u\n"
> "btime %lu\n"
> "processes %lu\n",
> kstat.context_swtch,
> - xtime.tv_sec - jif / HZ,
> + xtime.tv_sec - (unsigned long) jif,
> total_forks);
>
> return proc_calc_metrics(page, start, off, count, eof, len);
> @@ -565,4 +625,6 @@
> slabinfo_read_proc, NULL);
> if (entry)
> entry->write_proc = slabinfo_write_proc;
> +
> + init_idlewrap_timer();
> }
>
> --- linux-2.4.19-pre2/mm/oom_kill.c Sun Nov 4 02:05:25 2001
> +++ linux-2.4.19-pre2-j64/mm/oom_kill.c Fri Mar 1 00:05:24 2002
> @@ -69,11 +69,10 @@
> /*
> * CPU time is in seconds and run time is in minutes. There is no
> * particular reason for this other than that it turned out to work
> - * very well in practice. This is not safe against jiffie wraps
> - * but we don't care _that_ much...
> + * very well in practice.
> */
> cpu_time = (p->times.tms_utime + p->times.tms_stime) >> (SHIFT_HZ + 3);
> - run_time = (jiffies - p->start_time) >> (SHIFT_HZ + 10);
> + run_time = (get_jiffies64() - p->start_time) >> (SHIFT_HZ + 10);
>
> points /= int_sqrt(cpu_time);
> points /= int_sqrt(int_sqrt(run_time));
>
> --- linux-2.4.19-pre2/kernel/acct.c Thu Feb 28 23:52:26 2002
> +++ linux-2.4.19-pre2-j64/kernel/acct.c Fri Mar 1 00:06:51 2002
> @@ -56,6 +56,7 @@
> #include <linux/tty.h>
>
> #include <asm/uaccess.h>
> +#include <asm/div64.h>
>
> /*
> * These constants control the amount of freespace that suspend and
> @@ -227,20 +228,24 @@
> * This routine has been adopted from the encode_comp_t() function in
> * the kern_acct.c file of the FreeBSD operating system. The encoding
> * is a 13-bit fraction with a 3-bit (base 8) exponent.
> + *
> + * Bumped up to encode 64 bit values. Unfortunately the result may
> + * overflow now.
> */
>
> #define MANTSIZE 13 /* 13 bit mantissa. */
> -#define EXPSIZE 3 /* Base 8 (3 bit) exponent. */
> +#define EXPSIZE 3 /* 3 bit exponent. */
> +#define EXPBASE 3 /* Base 8 (3 bit) exponent. */
> #define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */
>
> -static comp_t encode_comp_t(unsigned long value)
> +static comp_t encode_comp_t(u64 value)
> {
> int exp, rnd;
>
> exp = rnd = 0;
> while (value > MAXFRACT) {
> - rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */
> - value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */
> + rnd = value & (1 << (EXPBASE - 1)); /* Round up? */
> + value >>= EXPBASE; /* Base 8 exponent == 3 bit shift. */
> exp++;
> }
>
> @@ -248,16 +253,21 @@
> * If we need to round up, do it (and handle overflow correctly).
> */
> if (rnd && (++value > MAXFRACT)) {
> - value >>= EXPSIZE;
> + value >>= EXPBASE;
> exp++;
> }
>
> /*
> * Clean it up and polish it off.
> */
> - exp <<= MANTSIZE; /* Shift the exponent into place */
> - exp += value; /* and add on the mantissa. */
> - return exp;
> + if (exp >= (1 << EXPSIZE)) {
> + /* Overflow. Return largest representable number instead. */
> + return (1ul << (MANTSIZE + EXPSIZE)) - 1;
> + } else {
> + exp <<= MANTSIZE; /* Shift the exponent into place */
> + exp += value; /* and add on the mantissa. */
> + return exp;
> + }
> }
>
> /*
> @@ -278,6 +288,7 @@
> mm_segment_t fs;
> unsigned long vsize;
> unsigned long flim;
> + u64 elapsed;
>
> /*
> * First check to see if there is enough free_space to continue
> @@ -295,8 +306,10 @@
> strncpy(ac.ac_comm, current->comm, ACCT_COMM);
> ac.ac_comm[ACCT_COMM - 1] = '\0';
>
> - ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ));
> - ac.ac_etime = encode_comp_t(jiffies - current->start_time);
> + elapsed = get_jiffies64() - current->start_time;
> + ac.ac_etime = encode_comp_t(elapsed);
> + do_div(elapsed, HZ);
> + ac.ac_btime = xtime.tv_sec - elapsed;
> ac.ac_utime = encode_comp_t(current->times.tms_utime);
> ac.ac_stime = encode_comp_t(current->times.tms_stime);
> ac.ac_uid = current->uid;
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
George [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Real time sched: http://sourceforge.net/projects/rtsched/

2002-03-01 17:58:25

by Andreas Dilger

[permalink] [raw]
Subject: Re: [patch] enable uptime display > 497 days on 32 bit (1/2)

On Mar 01, 2002 03:55 +0100, Tim Schmielau wrote:
> rediffed to 2.4.19-pre2 and three micro-optimizations:
>
> move jiffies_hi etc. to same cacheline as jiffies
> (suggested by George Anzinger)
> avoid turning off interrupts (suggested by Andreas Dilger)
> use unlikely() (suggested by Andreas Dilger)
>
> As no other comments turned up, this will go to Marcelo RSN.
> (wondered why noone vetoed this as overkill...)

Minor nit - the indenting of #ifdefs is not really used in the kernel.

> +u64 get_jiffies64(void)
> +{
> + unsigned long jiffies_tmp, jiffies_hi_tmp;
> +
> + spin_lock(&jiffies64_lock);
> + jiffies_tmp = jiffies; /* avoid races */
> + jiffies_hi_tmp = jiffies_hi;
> + if (unlikely(jiffies_tmp < jiffies_last)) /* We have a wrap */
> + jiffies_hi++;
> + jiffies_last = jiffies_tmp;
> + spin_unlock(&jiffies64_lock);
> +
> + return (jiffies_tmp | ((u64)jiffies_hi_tmp) << BITS_PER_LONG);
> +}

If jiffies_hi is incremented, then jiffies_hi_tmp will be wrong on return.

> +static void check_jiffieswrap(unsigned long data)
> +{
> + unsigned long jiffies_tmp;
> + mod_timer(&jiffieswrap_timer, jiffies + CHECK_JIFFIESWRAP_INTERVAL);
> +
> + if (spin_trylock(&jiffies64_lock)) {
> + /* If we don't get the lock, we can just give up.
> + The current holder of the lock will check for wraps */
> + jiffies_tmp = jiffies; /* avoid races */
> + if (jiffies_tmp < jiffies_last) /* We have a wrap */
> + jiffies_hi++;
> + jiffies_last = jiffies_tmp;
> + spin_unlock(&jiffies64_lock);
> + } }
note:----------------------------------------------------------------------^

Since check_jiffieswrap() and get_jiffies64() are substantially the same,
you may want to define a function _inc_jiffies64() which does:

+#ifdef NEEDS_JIFFIES64
+/* jiffies_hi and jiffies_last are protected by jiffies64_lock */
+static unsigned long jiffies_hi, jiffies_last;
+static spinlock_t jiffies64_lock = SPIN_LOCK_UNLOCKED;
+#endif

static inline void _inc_jiffies64(unsigned long jiffies_tmp)
{
jiffies_tmp = jiffies; /* avoid races */
if (jiffies_tmp < jiffies_last) /* We have a wrap */
jiffies_hi++;
jiffies_last = jiffies_tmp;
}

static void get_jiffies64()
{
unsigned long jiffies_tmp, jiffies_hi_tmp;

spin_lock(&jiffies64_lock);
_inc_jiffies64(jiffies_tmp);
jiffies_hi_tmp = jiffies_hi;
spin_unlock(&jiffies64_lock);

return (jiffies_tmp | ((u64)jiffies_hi_tmp) << BITS_PER_LONG);
}

static void check_jiffieswrap(unsigned long data)
{
unsigned long jiffies_tmp;
mod_timer(&jiffieswrap_timer, jiffies + CHECK_JIFFIESWRAP_INTERVAL);

/*
* If we don't get the lock, we can just give up.
* The current holder of the lock will check for wraps
*/
if (spin_trylock(&jiffies64_lock)) {
_inc_jiffies64(jiffies_tmp);
spin_unlock(&jiffies64_lock);
}
}

Cheers, Andreas
--
Andreas Dilger
http://sourceforge.net/projects/ext2resize/
http://www-mddsp.enel.ucalgary.ca/People/adilger/

2002-03-01 18:22:02

by Tim Schmielau

[permalink] [raw]
Subject: Re: [patch] enable uptime display > 497 days on 32 bit (1/2)

On Fri, 1 Mar 2002, Andreas Dilger wrote:

> On Mar 01, 2002 03:55 +0100, Tim Schmielau wrote:
[...]
> > As no other comments turned up, this will go to Marcelo RSN.
> > (wondered why noone vetoed this as overkill...)
>
> Minor nit - the indenting of #ifdefs is not really used in the kernel.
>
> > +u64 get_jiffies64(void)
> > +{
> > + unsigned long jiffies_tmp, jiffies_hi_tmp;
> > +
> > + spin_lock(&jiffies64_lock);
> > + jiffies_tmp = jiffies; /* avoid races */
> > + jiffies_hi_tmp = jiffies_hi;
> > + if (unlikely(jiffies_tmp < jiffies_last)) /* We have a wrap */
> > + jiffies_hi++;
> > + jiffies_last = jiffies_tmp;
> > + spin_unlock(&jiffies64_lock);
> > +
> > + return (jiffies_tmp | ((u64)jiffies_hi_tmp) << BITS_PER_LONG);
> > +}
>
> If jiffies_hi is incremented, then jiffies_hi_tmp will be wrong on return.

Thanks!
I thought I had corrected this but somehow must have posted a previous
version. I will probably soon be known as a sloppy coder :-(

> note:----------------------------------------------------------------------^
>
> Since check_jiffieswrap() and get_jiffies64() are substantially the same,
> you may want to define a function _inc_jiffies64() which does:
>
> +#ifdef NEEDS_JIFFIES64
> +/* jiffies_hi and jiffies_last are protected by jiffies64_lock */
> +static unsigned long jiffies_hi, jiffies_last;
> +static spinlock_t jiffies64_lock = SPIN_LOCK_UNLOCKED;
> +#endif
>
> static inline void _inc_jiffies64(unsigned long jiffies_tmp)
> {
> jiffies_tmp = jiffies; /* avoid races */
> if (jiffies_tmp < jiffies_last) /* We have a wrap */
> jiffies_hi++;
> jiffies_last = jiffies_tmp;
> }

Shouldn't this be

static inline void _inc_jiffies64(unsigned long *jiffies_tmp)
{
*jiffies_tmp = jiffies; /* avoid races */
if (*jiffies_tmp < jiffies_last) /* We have a wrap */
jiffies_hi++;
jiffies_last = *jiffies_tmp;
}

?
So that we'd then need

static u64 get_jiffies64(void)
{
unsigned long jiffies_tmp, jiffies_hi_tmp;

spin_lock(&jiffies64_lock);
_inc_jiffies64(&jiffies_tmp);
jiffies_hi_tmp = jiffies_hi;
spin_unlock(&jiffies64_lock);

return (jiffies_tmp | ((u64)jiffies_hi_tmp) << BITS_PER_LONG);
}
...

And I'm still thinking of a better name that _inc_jiffies64(), since
we most of the time don't increment anything.

Tim