Attempt to reduce stack usage in acct.c (linux-2.6.12-rc1-mm3). Stack
usage was noted using checkstack.pl. Specifically:
Before patch
------------
check_free_space - 128
do_acct_process - 105
After patch
-----------
check_free_space - 36
do_acct_process - 44
Signed-off-by: Yum Rayan <[email protected]>
--- a/kernel/acct.c 2005-03-25 22:11:06.000000000 -0800
+++ b/kernel/acct.c 2005-03-30 15:33:05.000000000 -0800
@@ -103,30 +103,32 @@
*/
static int check_free_space(struct file *file)
{
- struct kstatfs sbuf;
- int res;
- int act;
- sector_t resume;
- sector_t suspend;
+ struct kstatfs *sbuf = NULL;
+ int res, act;
+ sector_t resume, suspend;
spin_lock(&acct_globals.lock);
res = acct_globals.active;
if (!file || !acct_globals.needcheck)
- goto out;
+ goto out_unlock;
spin_unlock(&acct_globals.lock);
+ sbuf = kmalloc(sizeof (struct kstatfs), GFP_KERNEL);
+ if (!sbuf)
+ goto out_res;
+
/* May block */
- if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf))
- return res;
- suspend = sbuf.f_blocks * SUSPEND;
- resume = sbuf.f_blocks * RESUME;
+ if (vfs_statfs(file->f_dentry->d_inode->i_sb, sbuf))
+ goto out_free_sbuf;
+ suspend = sbuf->f_blocks * SUSPEND;
+ resume = sbuf->f_blocks * RESUME;
sector_div(suspend, 100);
sector_div(resume, 100);
- if (sbuf.f_bavail <= suspend)
+ if (sbuf->f_bavail <= suspend)
act = -1;
- else if (sbuf.f_bavail >= resume)
+ else if (sbuf->f_bavail >= resume)
act = 1;
else
act = 0;
@@ -139,7 +141,7 @@
if (file != acct_globals.file) {
if (act)
res = act>0;
- goto out;
+ goto out_unlock;
}
if (acct_globals.active) {
@@ -159,8 +161,11 @@
acct_globals.timer.expires = jiffies + ACCT_TIMEOUT*HZ;
add_timer(&acct_globals.timer);
res = acct_globals.active;
-out:
+out_unlock:
spin_unlock(&acct_globals.lock);
+out_free_sbuf:
+ kfree(sbuf);
+out_res:
return res;
}
@@ -380,7 +385,7 @@
*/
static void do_acct_process(long exitcode, struct file *file)
{
- acct_t ac;
+ acct_t *ac;
mm_segment_t fs;
unsigned long vsize;
unsigned long flim;
@@ -395,14 +400,18 @@
if (!check_free_space(file))
return;
+ ac = kmalloc(sizeof (acct_t), GFP_KERNEL);
+ if (!ac)
+ return;
+
/*
* Fill the accounting struct with the needed info as recorded
* by the different kernel functions.
*/
- memset((caddr_t)&ac, 0, sizeof(acct_t));
+ memset(ac, 0, sizeof(acct_t));
- ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER;
- strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm));
+ ac->ac_version = ACCT_VERSION | ACCT_BYTEORDER;
+ strlcpy(ac->ac_comm, current->comm, sizeof(ac->ac_comm));
/* calculate run_time in nsec*/
do_posix_clock_monotonic_gettime(&uptime);
@@ -412,57 +421,57 @@
/* convert nsec -> AHZ */
elapsed = nsec_to_AHZ(run_time);
#if ACCT_VERSION==3
- ac.ac_etime = encode_float(elapsed);
+ ac->ac_etime = encode_float(elapsed);
#else
- ac.ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
+ ac->ac_etime = encode_comp_t(elapsed < (unsigned long) -1l ?
(unsigned long) elapsed : (unsigned long) -1l);
#endif
#if ACCT_VERSION==1 || ACCT_VERSION==2
{
/* new enlarged etime field */
comp2_t etime = encode_comp2_t(elapsed);
- ac.ac_etime_hi = etime >> 16;
- ac.ac_etime_lo = (u16) etime;
+ ac->ac_etime_hi = etime >> 16;
+ ac->ac_etime_lo = (u16) etime;
}
#endif
do_div(elapsed, AHZ);
- ac.ac_btime = xtime.tv_sec - elapsed;
- ac.ac_utime = encode_comp_t(jiffies_to_AHZ(
+ ac->ac_btime = xtime.tv_sec - elapsed;
+ ac->ac_utime = encode_comp_t(jiffies_to_AHZ(
current->signal->utime +
current->group_leader->utime));
- ac.ac_stime = encode_comp_t(jiffies_to_AHZ(
+ ac->ac_stime = encode_comp_t(jiffies_to_AHZ(
current->signal->stime +
current->group_leader->stime));
/* we really need to bite the bullet and change layout */
- ac.ac_uid = current->uid;
- ac.ac_gid = current->gid;
+ ac->ac_uid = current->uid;
+ ac->ac_gid = current->gid;
#if ACCT_VERSION==2
- ac.ac_ahz = AHZ;
+ ac->ac_ahz = AHZ;
#endif
#if ACCT_VERSION==1 || ACCT_VERSION==2
/* backward-compatible 16 bit fields */
- ac.ac_uid16 = current->uid;
- ac.ac_gid16 = current->gid;
+ ac->ac_uid16 = current->uid;
+ ac->ac_gid16 = current->gid;
#endif
#if ACCT_VERSION==3
- ac.ac_pid = current->tgid;
- ac.ac_ppid = current->parent->tgid;
+ ac->ac_pid = current->tgid;
+ ac->ac_ppid = current->parent->tgid;
#endif
read_lock(&tasklist_lock); /* pin current->signal */
- ac.ac_tty = current->signal->tty ?
+ ac->ac_tty = current->signal->tty ?
old_encode_dev(tty_devnum(current->signal->tty)) : 0;
read_unlock(&tasklist_lock);
- ac.ac_flag = 0;
+ ac->ac_flag = 0;
if (current->flags & PF_FORKNOEXEC)
- ac.ac_flag |= AFORK;
+ ac->ac_flag |= AFORK;
if (current->flags & PF_SUPERPRIV)
- ac.ac_flag |= ASU;
+ ac->ac_flag |= ASU;
if (current->flags & PF_DUMPCORE)
- ac.ac_flag |= ACORE;
+ ac->ac_flag |= ACORE;
if (current->flags & PF_SIGNALED)
- ac.ac_flag |= AXSIG;
+ ac->ac_flag |= AXSIG;
vsize = 0;
if (current->mm) {
@@ -476,15 +485,15 @@
up_read(¤t->mm->mmap_sem);
}
vsize = vsize / 1024;
- ac.ac_mem = encode_comp_t(vsize);
- ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
- ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
- ac.ac_minflt = encode_comp_t(current->signal->min_flt +
+ ac->ac_mem = encode_comp_t(vsize);
+ ac->ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */
+ ac->ac_rw = encode_comp_t(ac->ac_io / 1024);
+ ac->ac_minflt = encode_comp_t(current->signal->min_flt +
current->group_leader->min_flt);
- ac.ac_majflt = encode_comp_t(current->signal->maj_flt +
+ ac->ac_majflt = encode_comp_t(current->signal->maj_flt +
current->group_leader->maj_flt);
- ac.ac_swaps = encode_comp_t(0);
- ac.ac_exitcode = exitcode;
+ ac->ac_swaps = encode_comp_t(0);
+ ac->ac_exitcode = exitcode;
/*
* Kernel segment override to datasegment and write it
@@ -497,10 +506,11 @@
*/
flim = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
- file->f_op->write(file, (char *)&ac,
+ file->f_op->write(file, (char *)ac,
sizeof(acct_t), &file->f_pos);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
set_fs(fs);
+ kfree(ac);
}
/*
On Wed, 30 March 2005 23:39:40 -0800, Yum Rayan wrote:
>
> Before patch
> ------------
> check_free_space - 128
> do_acct_process - 105
>
> After patch
> -----------
> check_free_space - 36
> do_acct_process - 44
It is always nice to see enthusiams, but in your case it might be a
bit misguided. None of the functions you worked on appear to be real
problems wrt. stack usage.
But if you have time to tackle some of these functions, that may make
a real difference:
http://wh.fh-wedel.de/~joern/stackcheck.2.6.11
In principle, all recursive paths should consume as little stack as
possible. Or the recursion itself could be avoided, even better. And
some of the call chains with ~3k of stack consumption may be
problematic on other platforms, like the x86-64. Taking care of those
could result in smaller stacks for the respective platform.
J?rn
--
When I am working on a problem I never think about beauty. I think
only how to solve the problem. But when I have finished, if the
solution is not beautiful, I know it is wrong.
-- R. Buckminster Fuller
J?rn Engel wrote:
> On Wed, 30 March 2005 23:39:40 -0800, Yum Rayan wrote:
>
>>Before patch
>>------------
>>check_free_space - 128
>>do_acct_process - 105
>>
>>After patch
>>-----------
>>check_free_space - 36
>>do_acct_process - 44
>
>
> It is always nice to see enthusiams, but in your case it might be a
> bit misguided. None of the functions you worked on appear to be real
> problems wrt. stack usage.
Yes, this is similar to what I was about to write.
It would be more useful to tackle the really large stack consumers
or ones in deep call chains.
> But if you have time to tackle some of these functions, that may make
> a real difference:
>
> http://wh.fh-wedel.de/~joern/stackcheck.2.6.11
>
> In principle, all recursive paths should consume as little stack as
> possible. Or the recursion itself could be avoided, even better. And
> some of the call chains with ~3k of stack consumption may be
> problematic on other platforms, like the x86-64. Taking care of those
> could result in smaller stacks for the respective platform.
Here is 2.6.12-rc1-bk3 raw checkstack output on x86-64:
http://developer.osdl.org/~rddunlap/doc/checkstack1.out
--
~Randy
On Thu, Mar 31, 2005 at 05:05:48PM +0200, J?rn Engel wrote:
> On Wed, 30 March 2005 23:39:40 -0800, Yum Rayan wrote:
> >
> > Before patch
> > ------------
> > check_free_space - 128
> > do_acct_process - 105
> >
> > After patch
> > -----------
> > check_free_space - 36
> > do_acct_process - 44
>
> It is always nice to see enthusiams, but in your case it might be a
> bit misguided. None of the functions you worked on appear to be real
> problems wrt. stack usage.
>
> But if you have time to tackle some of these functions, that may make
> a real difference:
>
> http://wh.fh-wedel.de/~joern/stackcheck.2.6.11
>
> In principle, all recursive paths should consume as little stack as
> possible. Or the recursion itself could be avoided, even better. And
Sometimes it's easy to prove that the recursion can't occur more than
once.
Especially with a moderate stack usage, such cases are not a problem.
But auditing the recursive paths for problematic ones is still an open
task.
> some of the call chains with ~3k of stack consumption may be
> problematic on other platforms, like the x86-64. Taking care of those
> could result in smaller stacks for the respective platform.
There's also something different that can be done:
On i386, unit-at-a-time is disabled (the only currently released version
of GNU gcc with unit-at-a-time is gcc 3.4 [1]) since gcc's stack
handling isn't very good.
With unit-at-a-time, the highest stack usage within a single function is
over 3kB.
While this is technically gcc's fault, workarounds were IMHO worth it
since unit-at-a-time gives me kernel images that are smaller by 2% [2]
and I was surprised if the speed effect wasn't positive [3].
The task I'm suggesting was therefore:
- remove the -fno-unit-at-a-time in arch/i386/Makefile in your private
kernel sources
- use gcc 3.4
- reduce the stack usages in call paths > 3kB
Note that with unit-at-a-time, gcc inline several static functions, so
the stack usage you see for a function might be accumulated from several
functions.
It's IMHO the best doing this against -mm.
I do currently not have the time for doing this, but it was something
with a real advantage for many users.
> J?rn
cu
Adrian
[1] SuSE "gcc 3.3" also supports this
[2] with -O2
[3] I do not claim it has to be measurable positive, but at least not
negative
--
"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed
On Thu, Mar 31, 2005 at 12:09:54PM -0800, Randy.Dunlap wrote:
> J?rn Engel wrote:
>...
> >In principle, all recursive paths should consume as little stack as
> >possible. Or the recursion itself could be avoided, even better. And
> >some of the call chains with ~3k of stack consumption may be
> >problematic on other platforms, like the x86-64. Taking care of those
> >could result in smaller stacks for the respective platform.
>
> Here is 2.6.12-rc1-bk3 raw checkstack output on x86-64:
> http://developer.osdl.org/~rddunlap/doc/checkstack1.out
Looking at the stack usage numbers, this was with a gcc that supports
unit-at-a-time.
If you use gcc 3.4 and enable unit-at-a-time (see my other email) the
i386 numbers look quite similar.
I doubt there are many architecture or 64 bit [1] specific stack usage
problems, so working on i386 might be enough.
> ~Randy
cu
Adrian
[1] theoretically a factor of two was possible due to the different
pointer sizes - but I have yet to see any example where this
really matters
--
"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed
> The task I'm suggesting was therefore:
> - remove the -fno-unit-at-a-time in arch/i386/Makefile in your private
> kernel sources
> - use gcc 3.4
> - reduce the stack usages in call paths > 3kB
This is a good idea. However, I might suggest using gcc 4.0 (you'll
have to use a snapshot now, but the release should only be a few weeks
away). A patch went into gcc 4.0 that makes gcc more intelligent
about sharing stack for variables that cannot be alive at the same
time, and therefore it may be more feasible to make unit-at-a-time
work for the i386 kernels.
- R.
On Thu, Mar 31, 2005 at 12:43:38PM -0800, Roland Dreier wrote:
> > The task I'm suggesting was therefore:
> > - remove the -fno-unit-at-a-time in arch/i386/Makefile in your private
> > kernel sources
> > - use gcc 3.4
> > - reduce the stack usages in call paths > 3kB
>
> This is a good idea. However, I might suggest using gcc 4.0 (you'll
> have to use a snapshot now, but the release should only be a few weeks
> away). A patch went into gcc 4.0 that makes gcc more intelligent
> about sharing stack for variables that cannot be alive at the same
> time, and therefore it may be more feasible to make unit-at-a-time
> work for the i386 kernels.
That's one option.
J?rn, can you send a list of call paths with a stack usage > 3kB when
compiling with gcc 3.4 and unit-at-a-time (or tell me how to generate
these lists)?
If fixing a handful of places was sufficient, it was IMHO worth it for
enabling unit-at-a-time with gcc 3.4 .
> - R.
cu
Adrian
--
"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed
On Thu, 31 March 2005 23:19:41 +0200, Adrian Bunk wrote:
>
> J?rn, can you send a list of call paths with a stack usage > 3kB when
> compiling with gcc 3.4 and unit-at-a-time (or tell me how to generate
> these lists)?
I'll do a spin over the weekend.
If you want to generate the lists, you can either join IBM, deal with
their lawyers or rewrite the checker [1].
> If fixing a handful of places was sufficient, it was IMHO worth it for
> enabling unit-at-a-time with gcc 3.4 .
Absolutely. Even if it required more work, it might be worth it,
eventually.
[1] http://wh.fh-wedel.de/~joern/quality.pdf
J?rn
--
I don't understand it. Nobody does.
-- Richard P. Feynman
On Fri, 1 April 2005 12:17:23 +0200, J?rn Engel wrote:
> On Thu, 31 March 2005 23:19:41 +0200, Adrian Bunk wrote:
> >
> > J?rn, can you send a list of call paths with a stack usage > 3kB when
> > compiling with gcc 3.4 and unit-at-a-time (or tell me how to generate
> > these lists)?
>
> I'll do a spin over the weekend.
Argl! No, most likely I won't. My checker currently depends on
gcc 3.1 (heavily patched). Porting this to 3.4 or 4.0 is something
I'd like to avoid. Sparse would be a better target to port to.
In any case, it's nothing to do over the weekend.
J?rn
--
And spam is a useful source of entropy for /dev/random too!
-- Jasmine Strong
Yum Rayan <[email protected]> writes:
> Attempt to reduce stack usage in acct.c (linux-2.6.12-rc1-mm3). Stack
> usage was noted using checkstack.pl. Specifically:
>
> Before patch
> ------------
> check_free_space - 128
>
> After patch
> -----------
> check_free_space - 36
>
> Signed-off-by: Yum Rayan <[email protected]>
>
> --- a/kernel/acct.c 2005-03-25 22:11:06.000000000 -0800
> +++ b/kernel/acct.c 2005-03-30 15:33:05.000000000 -0800
> @@ -103,30 +103,32 @@
> */
> static int check_free_space(struct file *file)
> {
> - struct kstatfs sbuf;
> - int res;
> - int act;
> - sector_t resume;
> - sector_t suspend;
> + struct kstatfs *sbuf = NULL;
> + int res, act;
> + sector_t resume, suspend;
I can't see how you expect to save 128-36=92 bytes here. You replace
sizeof(struct kstatfs)=64 with sizeof(struct kstatfs*)=4, which would
be a saving of 60 bytes. But the call to kmalloc()/kfree() reduces
your stack saving further, not to mention the runtime penalty,
introduced by allocating dynamic memory.
Regards, Olaf.