2002-06-10 03:58:50

by Dawson Engler

[permalink] [raw]
Subject: [CHECKER] 37 stack variables >= 1K in 2.4.17

Here are 37 errors where variables >= 1024 bytes are allocated on a function's
stack.

Dawson

# BUGs | File Name
4 | /drivers/cdrom.c
4 | /message/i2o_proc.c
3 | /net/airo.c
3 | /../inflate.c
2 | /fs/zlib.c
2 | /drivers/zlib.c
2 | /drivers/cpqfcTScontrol.c
2 | /fs/compr_rtime.c
1 | /char/sidewinder.c
1 | /drivers/ide-cs.c
1 | /drivers/iphase.c
1 | /drivers/ixj_pcmcia.c
1 | /i386/nmi.c
1 | /drivers/parport_cs.c
1 | /net/br_ioctl.c
1 | /drivers/qlogicfc.c
1 | /drivers/cpqarray.c
1 | /drivers/optcd.c
1 | /net/wanmain.c
1 | /i386/setup.c
1 | /fs/nfsroot.c
1 | /net/cycx_x25.c
1 | /fs/ioctl.c


############################################################
# 2.4.17 specific errors

#
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/scsi/qlogicfc.c:840:isp2x00_make_portdb: ERROR:VAR:840:840: suspicious var 'temp' = 3072 bytes:840 [nbytes=3072]
static int isp2x00_make_portdb(struct Scsi_Host *host)
{

short param[8];
int i, j;

Error --->
struct id_name_map temp[QLOGICFC_MAX_ID + 1];
struct isp2x00_hostdata *hostdata;

isp2x00_disable_irqs(host);
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/message/i2o/i2o_proc.c:838:i2o_proc_read_ddm_table: ERROR:VAR:838:838: suspicious var 'result' = 2828 bytes:838 [nbytes=2828]
u8 block_status;
u8 error_info_size;
u16 row_count;
u16 more_flag;
i2o_exec_execute_ddm_table ddm_table[MAX_I2O_MODULES];

Error --->
} result;

i2o_exec_execute_ddm_table ddm_table;

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/cdrom/optcd.c:1625:cdromread: ERROR:VAR:1625:1625: suspicious var 'buf' = 2646 bytes:1625 [nbytes=2646]

static int cdromread(unsigned long arg, int blocksize, int cmd)
{
int status;
struct cdrom_msf msf;

Error --->
char buf[CD_FRAMESIZE_RAWER];

status = verify_area(VERIFY_WRITE, (void *) arg, blocksize);
if (status)
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/message/i2o/i2o_proc.c:2266:i2o_proc_read_lan_mcast_addr: ERROR:VAR:2266:2266: suspicious var 'result' = 2060 bytes:2266 [nbytes=2060]
u8 block_status;
u8 error_info_size;
u16 row_count;
u16 more_flag;
u8 mc_addr[256][8];

Error --->
} result;

spin_lock(&i2o_proc_lock);
len = 0;
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/message/i2o/i2o_proc.c:2497:i2o_proc_read_lan_alt_addr: ERROR:VAR:2497:2497: suspicious var 'result' = 2060 bytes:2497 [nbytes=2060]
u8 block_status;
u8 error_info_size;
u16 row_count;
u16 more_flag;
u8 alt_addr[256][8];

Error --->
} result;

spin_lock(&i2o_proc_lock);
len = 0;
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/message/i2o/i2o_proc.c:1049:i2o_proc_read_groups: ERROR:VAR:1049:1049: suspicious var 'result' = 2060 bytes:1049 [nbytes=2060]
u8 block_status;
u8 error_info_size;
u16 row_count;
u16 more_flag;
i2o_group_info group[256];

Error --->
} result;

spin_lock(&i2o_proc_lock);

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/scsi/cpqfcTScontrol.c:721:CpqTsProcessIMQEntry: ERROR:VAR:721:721: suspicious var 'ulFibreFrame' = 2048 bytes:721 [nbytes=2048]
int iStatus;
USHORT i, RPCset, DPCset;
ULONG x_ID;
ULONG ulBuff, dwStatus;
TachFCHDR_GCMND* fchs;

Error --->
ULONG ulFibreFrame[2048/4]; // max number of DWORDS in incoming Fibre Frame
UCHAR ucInboundMessageType; // Inbound CM, dword 3 "type" field

ENTER("ProcessIMQEntry");
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/atm/iphase.c:2772:ia_ioctl: ERROR:VAR:2772:2772: suspicious var 'regs_local' = 2048 bytes:2772 [nbytes=2048]
ia_cmds.status = 0;
ia_cmds.len = 0x80;
break;
case MEMDUMP_FFL:
{

Error --->
ia_regs_t regs_local;
ffredn_t *ffL = &regs_local.ffredn;
rfredn_t *rfL = &regs_local.rfredn;

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/wireless/airo.c:4316:readrids: ERROR:VAR:4316:4316: suspicious var 'iobuf' = 2048 bytes:4316 [nbytes=2048]
* as needed. This represents the READ side of control I/O to
* the card
*/
static int readrids(struct net_device *dev, aironet_ioctl *comp) {
unsigned short ridcode;

Error --->
unsigned char iobuf[2048];

switch(comp->command)
{
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/wireless/airo.c:4364:writerids: ERROR:VAR:4364:4364: suspicious var 'iobuf' = 2048 bytes:4364 [nbytes=2048]

static int writerids(struct net_device *dev, aironet_ioctl *comp) {
int ridcode;
Resp rsp;
static int (* writer)(struct airo_info *, u16 rid, const void *, int);

Error --->
unsigned char iobuf[2048];

/* Only super-user can write RIDs */
if (!capable(CAP_NET_ADMIN))
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/scsi/cpqfcTScontrol.c:610:PeekIMQEntry: ERROR:VAR:610:610: suspicious var 'ulFibreFrame' = 2048 bytes:610 [nbytes=2048]
// If we find it, check the incoming frame payload (1st word)
// for LILP frame
if( (fcChip->IMQ->QEntry[CI].type & 0x1FF) == 0x104 )
{
TachFCHDR_GCMND* fchs;

Error --->
ULONG ulFibreFrame[2048/4]; // max DWORDS in incoming FC Frame
USHORT SFQpi = (USHORT)(fcChip->IMQ->QEntry[CI].word[0] & 0x0fffL);

CpqTsGetSFQEntry( fcChip,
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/arch/i386/kernel/setup.c:519:sanitize_e820_map: ERROR:VAR:519:519: function stack consumes 1840 bytes:519 [nbytes=1840]
______________________4_
*/

/* if there's only one memory region, don't bother */
if (*pnr_map < 2)

Error --->
return -1;

old_nr = *pnr_map;

---------------------------------------------------------
[BUG] *think* so
/u2/engler/mc/oses/linux/2.4.17/fs/umsdos/ioctl.c:96:UMSDOS_ioctl_dir: ERROR:VAR:96:96: function stack consumes 1772 bytes:96 [nbytes=1772]
&& cmd != UMSDOS_UNLINK_EMD
&& cmd != UMSDOS_UNLINK_DOS
&& cmd != UMSDOS_RMDIR_DOS
&& cmd != UMSDOS_STAT_DOS
&& cmd != UMSDOS_DOS_SETUP)

Error --->
return fat_dir_ioctl (dir, filp, cmd, data_ptr);

/* #Specification: ioctl / access
* Only root (effective id) is allowed to do IOCTL on directory
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/wireless/airo.c:3319:airo_ioctl: ERROR:VAR:3319:3319: function stack consumes 1676 bytes:3319 [nbytes=1676]
#endif /* CISCO_EXT */
{
/* If the command read some stuff, we better get it out of
* the card first... */
if(IW_IS_GET(cmd))

Error --->
readStatusRid(local, &status_rid);
if(IW_IS_GET(cmd) || (cmd == SIOCSIWRATE) || (cmd == SIOCSIWENCODE))
readCapabilityRid(local, &cap_rid);
/* Get config in all cases, because SET will just modify it */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/block/cpqarray.c:1188:ida_ioctl: ERROR:VAR:1188:1188: suspicious var 'my_io' = 1296 bytes:1188 [nbytes=1296]
int dsk = MINOR(inode->i_rdev) >> NWD_SHIFT;
int error;
int diskinfo[4];
struct hd_geometry *geo = (struct hd_geometry *)arg;
ida_ioctl_t *io = (ida_ioctl_t*)arg;

Error --->
ida_ioctl_t my_io;

switch(cmd) {
case HDIO_GETGEO:
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/net/wanrouter/wanmain.c:754:device_new_if: ERROR:VAR:754:754: suspicious var 'conf' = 1272 bytes:754 [nbytes=1272]
* o register network interface
*/

static int device_new_if (wan_device_t *wandev, wanif_conf_t *u_conf)
{

Error --->
wanif_conf_t conf;
netdevice_t *dev=NULL;
#ifdef CONFIG_WANPIPE_MULTPPP
struct ppp_device *pppdev=NULL;
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/block/../../lib/inflate.c:750:inflate_dynamic: ERROR:VAR:750:750: suspicious var 'll' = 1264 bytes:750 [nbytes=1264]
unsigned nl; /* number of literal/length codes */
unsigned nd; /* number of distance codes */
#ifdef PKZIP_BUG_WORKAROUND
unsigned ll[288+32]; /* literal/length and distance code lengths */
#else

Error --->
unsigned ll[286+30]; /* literal/length and distance code lengths */
#endif
register ulg b; /* bit buffer */
register unsigned k; /* number of bits in bit buffer */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/zlib.c:4216:huft_build: ERROR:VAR:4216:4216: suspicious var 'v' = 1152 bytes:4216 [nbytes=1152]
int l; /* bits per table (returned in m) */
register uIntf *p; /* pointer into c[], b[], or v[] */
inflate_huft *q; /* points to current table */
struct inflate_huft_s r; /* table entry for structure assignment */
inflate_huft *u[BMAX]; /* table stack */

Error --->
uInt v[N_MAX]; /* values in order of bit length */
register int w; /* bits before this table == (l * h) */
uInt x[BMAX+1]; /* bit offsets, then code stack */
uIntf *xp; /* pointer into x */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/fs/jffs2/zlib.c:4501:inflate_trees_fixed: ERROR:VAR:4501:4501: suspicious var 'c' = 1152 bytes:4501 [nbytes=1152]
{
/* build fixed tables if not already (multiple overlapped executions ok) */
if (!fixed_built)
{
int k; /* temporary variable */

Error --->
unsigned c[288]; /* length list for huft_build */
z_stream z; /* for falloc function */
int f = FIXEDH; /* number of hufts left in fixed_mem */

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/fs/jffs2/zlib.c:4216:huft_build: ERROR:VAR:4216:4216: suspicious var 'v' = 1152 bytes:4216 [nbytes=1152]
int l; /* bits per table (returned in m) */
register uIntf *p; /* pointer into c[], b[], or v[] */
inflate_huft *q; /* points to current table */
struct inflate_huft_s r; /* table entry for structure assignment */
inflate_huft *u[BMAX]; /* table stack */

Error --->
uInt v[N_MAX]; /* values in order of bit length */
register int w; /* bits before this table == (l * h) */
uInt x[BMAX+1]; /* bit offsets, then code stack */
uIntf *xp; /* pointer into x */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/block/../../lib/inflate.c:688:inflate_fixed: ERROR:VAR:688:688: suspicious var 'l' = 1152 bytes:688 [nbytes=1152]
int i; /* temporary variable */
struct huft *tl; /* literal/length code table */
struct huft *td; /* distance code table */
int bl; /* lookup bits for tl */
int bd; /* lookup bits for td */

Error --->
unsigned l[288]; /* length list for huft_build */

DEBG("<fix");

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/block/../../lib/inflate.c:301:huft_build: ERROR:VAR:301:301: suspicious var 'v' = 1152 bytes:301 [nbytes=1152]
int l; /* bits per table (returned in m) */
register unsigned *p; /* pointer into c[], b[], or v[] */
register struct huft *q; /* points to current table */
struct huft r; /* table entry for structure assignment */
struct huft *u[BMAX]; /* table stack */

Error --->
unsigned v[N_MAX]; /* values in order of bit length */
register int w; /* bits before this table == (l * h) */
unsigned x[BMAX+1]; /* bit offsets, then code stack */
unsigned *xp; /* pointer into x */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/zlib.c:4501:inflate_trees_fixed: ERROR:VAR:4501:4501: suspicious var 'c' = 1152 bytes:4501 [nbytes=1152]
{
/* build fixed tables if not already (multiple overlapped executions ok) */
if (!fixed_built)
{
int k; /* temporary variable */

Error --->
unsigned c[288]; /* length list for huft_build */
z_stream z; /* for falloc function */
int f = FIXEDH; /* number of hufts left in fixed_mem */

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/ide/ide-cs.c:247:ide_config: ERROR:VAR:367:367: function stack consumes 1148 bytes:367 [nbytes=1148]
link->conf.Vpp1/10, link->conf.Vpp1%10);

link->state &= ~DEV_CONFIG_PENDING;
return;


Error --->
cs_failed:
cs_error(link->handle, last_fn, last_ret);
failed:
ide_release((u_long)link);
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/parport/parport_cs.c:252:parport_config: ERROR:VAR:327:327: function stack consumes 1136 bytes:327 [nbytes=1136]
printk("]\n");

link->state &= ~DEV_CONFIG_PENDING;
return;


Error --->
cs_failed:
cs_error(link->handle, last_fn, last_ret);
failed:
parport_cs_release((u_long)link);
---------------------------------------------------------
[BUG] this function just keeps showing up...
/u2/engler/mc/oses/linux/2.4.17/drivers/telephony/ixj_pcmcia.c:221:ixj_config: ERROR:VAR:266:266: function stack consumes 1132 bytes:266 [nbytes=1132]
info->node.major = PHONE_MAJOR;
link->dev = &info->node;
ixj_get_serial(link, j);
link->state &= ~DEV_CONFIG_PENDING;
return;

Error --->
cs_failed:
cs_error(link->handle, last_fn, last_ret);
ixj_cs_release((u_long) link);
}
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/char/joystick/sidewinder.c:572:sw_connect: ERROR:VAR:572:572: function stack consumes 1093 bytes:572 [nbytes=1093]
unsigned char m = 1;
char comment[40];

comment[0] = 0;


Error --->
if (!(sw = kmalloc(sizeof(struct sw), GFP_KERNEL))) return;
memset(sw, 0, sizeof(struct sw));

gameport->private = sw;
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/cdrom/cdrom.c:738:cdrom_number_of_slots: ERROR:VAR:738:738: suspicious var 'info' = 1032 bytes:738 [nbytes=1032]
*/
int cdrom_number_of_slots(struct cdrom_device_info *cdi)
{
int status;
int nslots = 1;

Error --->
struct cdrom_changer_info info;

cdinfo(CD_CHANGER, "entering cdrom_number_of_slots()\n");
/* cdrom_read_mech_status requires a valid value for capacity: */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/cdrom/cdrom.c:780:cdrom_select_disc: ERROR:VAR:780:780: suspicious var 'info' = 1032 bytes:780 [nbytes=1032]
return cdi->ops->generic_packet(cdi, &cgc);
}

int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
{

Error --->
struct cdrom_changer_info info;
int curslot;
int ret;

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/cdrom/cdrom.c:1526:cdrom_ioctl: ERROR:VAR:1526:1526: suspicious var 'info' = 1032 bytes:1526 [nbytes=1032]
cdi->options |= CDO_AUTO_CLOSE | CDO_AUTO_EJECT;
return 0;
}

case CDROM_MEDIA_CHANGED: {

Error --->
struct cdrom_changer_info info;

cdinfo(CD_DO_IOCTL, "entering CDROM_MEDIA_CHANGED\n");
if (!CDROM_CAN(CDC_MEDIA_CHANGED))
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/cdrom/cdrom.c:714:cdrom_slot_status: ERROR:VAR:714:714: suspicious var 'info' = 1032 bytes:714 [nbytes=1032]
return cdo->generic_packet(cdi, &cgc);
}

static int cdrom_slot_status(struct cdrom_device_info *cdi, int slot)
{

Error --->
struct cdrom_changer_info info;
int ret;

cdinfo(CD_CHANGER, "entering cdrom_slot_status()\n");
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/fs/jffs2/compr_rtime.c:97:rtime_decompress: ERROR:VAR:97:97: suspicious var 'positions' = 1024 bytes:97 [nbytes=1024]


void rtime_decompress(unsigned char *data_in, unsigned char *cpage_out,
__u32 srclen, __u32 destlen)
{

Error --->
int positions[256];
int outpos = 0;
int pos=0;

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/fs/nfs/nfsroot.c:238:root_nfs_name: ERROR:VAR:238:238: suspicious var 'buf' = 1024 bytes:238 [nbytes=1024]
/*
* Prepare the NFS data structure and parse all options.
*/
static int __init root_nfs_name(char *name)
{

Error --->
char buf[NFS_MAXPATHLEN];
char *cp;

/* Set some default values */
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/drivers/net/wan/cycx_x25.c:983:hex_dump: ERROR:VAR:983:983: suspicious var 'hex' = 1024 bytes:983 [nbytes=1024]
card->devname, cmd->command);
}
#ifdef CYCLOMX_X25_DEBUG
static void hex_dump(char *msg, unsigned char *p, int len)
{

Error --->
unsigned char hex[1024],
* phex = hex;

if (len >= (sizeof(hex) / 2))
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/fs/jffs2/compr_rtime.c:57:rtime_compress: ERROR:VAR:57:57: suspicious var 'positions' = 1024 bytes:57 [nbytes=1024]

/* _compress returns the compressed size, -1 if bigger */
int rtime_compress(unsigned char *data_in, unsigned char *cpage_out,
__u32 *sourcelen, __u32 *dstlen)
{

Error --->
int positions[256];
int outpos = 0;
int pos=0;

---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/arch/i386/kernel/nmi.c:48:check_nmi_watchdog: ERROR:VAR:48:48: suspicious var 'tmp' = 1024 bytes:48 [nbytes=1024]
#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED

int __init check_nmi_watchdog (void)
{

Error --->
irq_cpustat_t tmp[NR_CPUS];
int j, cpu;

printk(KERN_INFO "testing NMI watchdog ... ");
---------------------------------------------------------
[BUG]
/u2/engler/mc/oses/linux/2.4.17/net/bridge/br_ioctl.c:86:br_ioctl_device: ERROR:VAR:86:86: suspicious var 'indices' = 1024 bytes:86 [nbytes=1024]
}

case BRCTL_GET_PORT_LIST:
{
int i;

Error --->
int indices[256];

for (i=0;i<256;i++)
indices[i] = 0;


2002-06-16 18:51:34

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On Sun, 16 Jun 2002 [email protected] wrote:

> That's simply not true. Of the top of my head - /proc/self.
>
> Ah, yes - that has the string on stack.
> That changes my inventory into: all callers except two use a page.
> One (jffs2) uses a kmalloced area. One (/proc/self) has its data
> on the stack.
>
> Look, it's getting ridiculous.
>
> That is a counterproductive reply.
> You say "I tried to improve Linux three years ago and failed,
> so you can forget it - no improvement is possible".

No. What I'm saying is "your handwaving is getting ridiculous".

> Let us try. The basic object is a struct link_work that has dentry,
> nameidata, link, page and flags and a pointer to the next such struct.
> This is a reverse linked list: the thing we work on is in front,
> the tail of the list is the thing we originally started to resolve.
>
> The routine that does all this is
>
> int do_link_work(struct link_work **lw) {
> int err = 0;
>
> while((*lw)->link) {
> if (err == 0)
> err = do_link_item(lw);
> else
> do_bad_link_item(lw);
> }
> return err;
> }
>
> where do_link_item() has as duty to handle the next part
> of the pathname. That diminishes the work left, unless
> that next part was a symlink, in which case resolution
> of that is prepended to the list of work we have to do.
> If something is wrong, do_bad_link_item() only releases resources.
>
> Looks like a nice and clean setup.

"The process is iterative, so there is a loop somewhere; let's put
whatever body it might have into a separate function - see, no ugliness
there".

> Remains the question how to release the resources allocated
> by the filesystems in prepare_follow_link().

... and a couple of other questions, like, say it, how to deal with
handling of "alternative roots" (/usr/gnuemul/<foo>) or what will your
do_link_item() be.

> There are various possibilities. General ones: the filesystem
> provides a callback. Restricted ones: we ask the filesystem
> to have one page as the only resource. Kludgy ones: we allow
> some explicit short list, like page or kmalloc. Probably others.
>
> But maybe you are not interested in thinking about such things.
> You did it already.

Indeed I had. Look, either you have a decent solution or not. If you
want to try - by all means, go ahead, I'll be glad to see the current
situation improved. Write a patch and post it for review - it's not
exactly the rocket science. Obvious requirements:
* if current code resolves a pathname, replacement should do the same.
* if current code fails with something different from ELOOP - so
should the replacement (modulo things like ENOMEM, obviously)
* replacement code should not penalize the lookups that do not
encounter nested symlinks at all.
* resulting namei.c should not be uglier than it currently is
(and if some code migrates to new files - same for all these files combined).

Fair enough, I hope? Keep in mind that pathname resolution is one of the
hotspots, so "should not penalize..." part is quite serious. Write it
(starting at 2.5.<current> version) and post it for review. Then there
will be something to talk about; right now you are making very vague
proposals of reorganization of code you hadn't read through and saying that
such reorganization can be done with decent results...

2002-06-12 18:29:14

by Pavel Machek

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

Hi!

> # BUGs | File Name
> 4 | /drivers/cdrom.c
> 4 | /message/i2o_proc.c
> 3 | /net/airo.c
> 3 | /../inflate.c
> 2 | /fs/zlib.c
> 2 | /drivers/zlib.c
> 2 | /drivers/cpqfcTScontrol.c
~~~~~~~~~~~~~~~~~~~~~~~~~

Actually, 3 bugs, the name is so ugly that it is a bug, too :-).
Pavel
--
(about SSSCA) "I don't say this lightly. However, I really think that the U.S.
no longer is classifiable as a democracy, but rather as a plutocracy." --hpa

2002-06-12 19:11:39

by Nikita Danilov

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

Pavel Machek writes:
> Hi!
>
> > # BUGs | File Name
> > 4 | /drivers/cdrom.c
> > 4 | /message/i2o_proc.c
> > 3 | /net/airo.c
> > 3 | /../inflate.c
> > 2 | /fs/zlib.c
> > 2 | /drivers/zlib.c
> > 2 | /drivers/cpqfcTScontrol.c
> ~~~~~~~~~~~~~~~~~~~~~~~~~

By the way, gcc has -Wlarger-than-NNNN option to do such checks.

>
> Actually, 3 bugs, the name is so ugly that it is a bug, too :-).
> Pavel

Nikita.

2002-06-12 21:51:27

by Benjamin LaHaise

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

On Sun, Jun 09, 2002 at 08:56:30PM -0700, Dawson Engler wrote:
> Here are 37 errors where variables >= 1024 bytes are allocated on a function's
> stack.

Is it possible to get checker to determine the stack depth of a worst
case call chain (excluding interrupts)? I've found that deep call chains
are far more likely to cause stack overflows than short and bounded paths.

-ben

2002-06-12 22:26:57

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On Wed, 12 Jun 2002, Benjamin LaHaise wrote:

> On Sun, Jun 09, 2002 at 08:56:30PM -0700, Dawson Engler wrote:
> > Here are 37 errors where variables >= 1024 bytes are allocated on a function's
> > stack.
>
> Is it possible to get checker to determine the stack depth of a worst
> case call chain (excluding interrupts)? I've found that deep call chains
> are far more likely to cause stack overflows than short and bounded paths.

Not realistic - we have a recursion through the ->follow_link(), and
a lot of stuff can be called from ->follow_link(). We _do_ have a
limit on depth of recursion here, but it won't be fun to deal with.

2002-06-12 22:38:55

by Benjamin LaHaise

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

On Wed, Jun 12, 2002 at 06:26:55PM -0400, Alexander Viro wrote:
> Not realistic - we have a recursion through the ->follow_link(), and
> a lot of stuff can be called from ->follow_link(). We _do_ have a
> limit on depth of recursion here, but it won't be fun to deal with.

Perfection isn't what I'm looking for, rather just an approximation.
Any tool would have to give up on non-trivial recursion, or have
additional rules imposed on the system. Checker seems to be growing
functionality in this area, so it seems like a useful feature request.

-ben
--
"You will be reincarnated as a toad; and you will be much happier."

2002-06-12 22:44:29

by Robert Love

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

On Wed, 2002-06-12 at 15:38, Benjamin LaHaise wrote:

> Perfection isn't what I'm looking for, rather just an approximation.
> Any tool would have to give up on non-trivial recursion, or have
> additional rules imposed on the system. Checker seems to be growing
> functionality in this area, so it seems like a useful feature request.

I do not want to give up on this idea, either... if the implementation
needs to be "hackish" or even explicitly ignore certain functions, so be
it. There is a _lot_ that can be done with detailed call chain analysis
-- the point you gave is one of many.

Checker already has some basic functionality here I suspect based on
what it is capable of reporting... imagine what more could be reported?

Robert Love

2002-06-12 22:54:56

by Tom Bradley

[permalink] [raw]
Subject: procfs documentation

Where can I find documentation that explains all the entries in procfs?

man 5 procfs is out of date and doesn't match.
Documentation/filesystems/proc.txt is incomplete.

I am writing a user-land GUI based program that allows user to easily read
the procfs but I can't find info on all the entries.

Tom


2002-06-13 00:20:50

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On Wed, 12 Jun 2002, Benjamin LaHaise wrote:

> On Wed, Jun 12, 2002 at 06:26:55PM -0400, Alexander Viro wrote:
> > Not realistic - we have a recursion through the ->follow_link(), and
> > a lot of stuff can be called from ->follow_link(). We _do_ have a
> > limit on depth of recursion here, but it won't be fun to deal with.
>
> Perfection isn't what I'm looking for, rather just an approximation.
> Any tool would have to give up on non-trivial recursion, or have

... in which case it will be useless - anything callable from path_walk()
will be out of its scope and that's a fairly large part of VFS, filesystems,
VM and upper halves of block devices.

> additional rules imposed on the system. Checker seems to be growing
> functionality in this area, so it seems like a useful feature request.

Just be careful with that loop - (mutual) recursion through ->follow_link()
must be special-cased if you want anything interesting to come out of that.

2002-06-13 06:37:01

by Dawson Engler

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

>
> On Sun, Jun 09, 2002 at 08:56:30PM -0700, Dawson Engler wrote:
> > Here are 37 errors where variables >= 1024 bytes are allocated on a function's
> > stack.
>
> Is it possible to get checker to determine the stack depth of a worst
> case call chain (excluding interrupts)? I've found that deep call chains
> are far more likely to cause stack overflows than short and bounded paths.

Yeah, it's not that hard. The main problem is determining if recursive
loops are feasible. I'd released bugs from it before, but no one fixed any
so hadn't rerun it since.

2002-06-13 06:38:16

by Dawson Engler

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

> On Wed, 12 Jun 2002, Benjamin LaHaise wrote:
>
> > On Sun, Jun 09, 2002 at 08:56:30PM -0700, Dawson Engler wrote:
> > > Here are 37 errors where variables >= 1024 bytes are allocated on a function's
> > > stack.
> >
> > Is it possible to get checker to determine the stack depth of a worst
> > case call chain (excluding interrupts)? I've found that deep call chains
> > are far more likely to cause stack overflows than short and bounded paths.
>
> Not realistic - we have a recursion through the ->follow_link(), and
> a lot of stuff can be called from ->follow_link(). We _do_ have a
> limit on depth of recursion here, but it won't be fun to deal with.

You mean following function pointers is not realistic? Actually the
function pointers in linxu are pretty easy to deal with since, by
and large, they are set by static structure initialization and not
really fussed with afterwards.

2002-06-13 06:59:39

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On Wed, 12 Jun 2002, Dawson Engler wrote:

> > Not realistic - we have a recursion through the ->follow_link(), and
> > a lot of stuff can be called from ->follow_link(). We _do_ have a
> > limit on depth of recursion here, but it won't be fun to deal with.
>
> You mean following function pointers is not realistic? Actually the
> function pointers in linxu are pretty easy to deal with since, by
> and large, they are set by static structure initialization and not
> really fussed with afterwards.

I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
->vfs_follow_link->link_path_walk) you will get infinite maximal depth
for everything that can be called by any of these functions. And that's
a _lot_ of stuff.

2002-06-13 08:31:08

by Helge Hafting

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

Alexander Viro wrote:
>
> On Wed, 12 Jun 2002, Benjamin LaHaise wrote:
>
> > On Wed, Jun 12, 2002 at 06:26:55PM -0400, Alexander Viro wrote:
> > > Not realistic - we have a recursion through the ->follow_link(), and
> > > a lot of stuff can be called from ->follow_link(). We _do_ have a
> > > limit on depth of recursion here, but it won't be fun to deal with.
> >
> > Perfection isn't what I'm looking for, rather just an approximation.
> > Any tool would have to give up on non-trivial recursion, or have
>
> ... in which case it will be useless - anything callable from path_walk()
> will be out of its scope and that's a fairly large part of VFS, filesystems,
> VM and upper halves of block devices.

The automated checker may use hard-coded limits for recursions with
limited depth. If follow_link stops after n iterations, tell
the checker about it and it will use that in its computations.

Helge Hafting

2002-06-13 11:17:40

by John Levon

[permalink] [raw]
Subject: Re: procfs documentation

On Wed, Jun 12, 2002 at 04:55:49PM -0600, Tom Bradley wrote:

> Where can I find documentation that explains all the entries in procfs?
>
> man 5 procfs is out of date and doesn't match.

Upgrade your man-pages.

> I am writing a user-land GUI based program that allows user to easily read
> the procfs but I can't find info on all the entries.

read the source

(and don't reply to irrelevant threads)

john

--
"All is change; all yields its place and goes"
- Euripides

2002-06-13 13:24:24

by Roger Larsson

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

On Thursday 13 June 2002 10.30, Helge Hafting wrote:
> Alexander Viro wrote:
> >
> > On Wed, 12 Jun 2002, Benjamin LaHaise wrote:
> >
> > > On Wed, Jun 12, 2002 at 06:26:55PM -0400, Alexander Viro wrote:
> > > > Not realistic - we have a recursion through the ->follow_link(), and
> > > > a lot of stuff can be called from ->follow_link(). We _do_ have a
> > > > limit on depth of recursion here, but it won't be fun to deal with.
> > >
> > > Perfection isn't what I'm looking for, rather just an approximation.
> > > Any tool would have to give up on non-trivial recursion, or have
> >
> > ... in which case it will be useless - anything callable from path_walk()
> > will be out of its scope and that's a fairly large part of VFS,
filesystems,
> > VM and upper halves of block devices.
>
> The automated checker may use hard-coded limits for recursions with
> limited depth. If follow_link stops after n iterations, tell
> the checker about it and it will use that in its computations.

Alexander Viro <[email protected]> wrote:
> (link_path_walk->do_follow_link->foofs_follow_link->
> vfs_follow_link->link_path_walk)

It would not need to follow the recursion at all.

A simple warning "vfs_follow_link makes a recursive call back
to link_path_walk, stack space needed for each recursion is N bytes"

/RogerL

2002-06-13 17:42:38

by Daniel Phillips

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

On Thursday 13 June 2002 08:59, Alexander Viro wrote:
> On Wed, 12 Jun 2002, Dawson Engler wrote:
>
> > > Not realistic - we have a recursion through the ->follow_link(), and
> > > a lot of stuff can be called from ->follow_link(). We _do_ have a
> > > limit on depth of recursion here, but it won't be fun to deal with.
> >
> > You mean following function pointers is not realistic? Actually the
> > function pointers in linxu are pretty easy to deal with since, by
> > and large, they are set by static structure initialization and not
> > really fussed with afterwards.
>
> I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> for everything that can be called by any of these functions. And that's
> a _lot_ of stuff.

Then at the point of recursion a dynamic check for stack space is
needed, and [checker]'s role would be to determine the deepest static
depth, to plug into the stack check. If we want to be sure about
stack integrity there isn't any way around this.

--
Daniel

2002-06-13 17:53:54

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On Thu, 13 Jun 2002, Daniel Phillips wrote:

> > I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> > ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> > for everything that can be called by any of these functions. And that's
> > a _lot_ of stuff.
>
> Then at the point of recursion a dynamic check for stack space is
> needed, and [checker]'s role would be to determine the deepest static
> depth, to plug into the stack check. If we want to be sure about
> stack integrity there isn't any way around this.

Wrong. Check for stack _space_ will mean that maximal depth of nested
symlinks depends on syscall. Definitely not what you want to see.
There is a static limit (no more than 5 nested), but it must be
explicitly known to checker - deducing it from code is easy for a
human, but hopeless for anything automatic.

2002-06-13 17:56:32

by Andi Kleen

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

Alexander Viro <[email protected]> writes:
>
> I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> for everything that can be called by any of these functions. And that's
> a _lot_ of stuff.

Surely an analysis pass can detect recursive function chains and flag them
(e.g. the global IPA alias analysis pass in open64 does this)

-Andi

2002-06-13 18:26:56

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On 13 Jun 2002, Andi Kleen wrote:

> Alexander Viro <[email protected]> writes:
> >
> > I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> > ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> > for everything that can be called by any of these functions. And that's
> > a _lot_ of stuff.
>
> Surely an analysis pass can detect recursive function chains and flag them
> (e.g. the global IPA alias analysis pass in open64 does this)

Ugh... OK, let me try again:

One bit of apriory information needed to get anything interesting from
this analysis: there is a set of mutually recursive functions (see above)
and there is a limit (5) on the depth of recursion in that loop.

It has to be known to checker. Explicitly, since
a) automatically deducing it is not realistic
b) cutting off the stuff behind that loop will cut off a _lot_ of
things - both in filesystems and in VFS (and in block layer, while we are
at it).

I'm not saying that checker can't be used for that analysis - it can, but
naive approach (find recursive stuff and cut it off) will not be too
interesting. One of the main stumbling blocks - see above. With explicit
knowledge of that one the thing will be definitely very interesting - no
arguments here.

2002-06-13 18:46:33

by Daniel Phillips

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

On Thursday 13 June 2002 19:53, Alexander Viro wrote:
> On Thu, 13 Jun 2002, Daniel Phillips wrote:
>
> > > I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> > > ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> > > for everything that can be called by any of these functions. And that's
> > > a _lot_ of stuff.
> >
> > Then at the point of recursion a dynamic check for stack space is
> > needed, and [checker]'s role would be to determine the deepest static
> > depth, to plug into the stack check. If we want to be sure about
> > stack integrity there isn't any way around this.
>
> Wrong. Check for stack _space_ will mean that maximal depth of nested
> symlinks depends on syscall. Definitely not what you want to see.
> There is a static limit (no more than 5 nested), but it must be
> explicitly known to checker - deducing it from code is easy for a
> human, but hopeless for anything automatic.

It's even hard to deduce the recursion in this case, and even if
checker is smart enough to spot it there's no way to know the
static requirement of out-of-tree filesystems.

Perhaps a BUG here is the right thing, in case the big chain of
assumptions here is inadvertently violated, in which case I'd much
rather have the system go down with a BUG than wig out in one of
the weird and wonderful ways typical of a stack overflow.

By the way:

327 /*
328 * This limits recursive symlink follows to 8, while
329 * limiting consecutive symlinks to 40.

Maybe:

327 /*
328 * This limits recursive symlink follows and consecutive symlinks.

--
Daniel

2002-06-13 19:01:51

by Andi Kleen

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

On Thu, Jun 13, 2002 at 08:26:54PM +0200, Alexander Viro wrote:
> Ugh... OK, let me try again:
>
> One bit of apriory information needed to get anything interesting from
> this analysis: there is a set of mutually recursive functions (see above)
> and there is a limit (5) on the depth of recursion in that loop.
>
> It has to be known to checker. Explicitly, since
> a) automatically deducing it is not realistic
> b) cutting off the stuff behind that loop will cut off a _lot_ of
> things - both in filesystems and in VFS (and in block layer, while we are
> at it).
>
> I'm not saying that checker can't be used for that analysis - it can, but
> naive approach (find recursive stuff and cut it off) will not be too

if you see all possible paths through the program as a tree which branches
for every decision then you only need to cut off the branches that are
actually pointing upward the tree again. This would still allow to follow
down into the callees of the recursive function because there should be
at least one path that is non recursive (if not Checker should definitely
complain ;)

e.g.
----<-----------------+
v |
IF TRUE RECURSION
-------+------ some path ----+
|
ELSE non recursive path
+-------------------------- other functions ---------->


Other functions can be still checked, you only need to prune the cycle.
I have no idea if checker's algorithms actually work like this, but I could
imagine that it would be one possible implementation.

-Andi

2002-06-13 21:50:34

by Dawson Engler

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

> On 13 Jun 2002, Andi Kleen wrote:
>
> > Alexander Viro <[email protected]> writes:
> > >
> > > I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> > > ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> > > for everything that can be called by any of these functions. And that's
> > > a _lot_ of stuff.
> >
> > Surely an analysis pass can detect recursive function chains and flag them
> > (e.g. the global IPA alias analysis pass in open64 does this)
>
> Ugh... OK, let me try again:
>
> One bit of apriory information needed to get anything interesting from
> this analysis: there is a set of mutually recursive functions (see above)
> and there is a limit (5) on the depth of recursion in that loop.
>
> It has to be known to checker. Explicitly, since
> a) automatically deducing it is not realistic
> b) cutting off the stuff behind that loop will cut off a _lot_ of
> things - both in filesystems and in VFS (and in block layer, while we are
> at it).

We're all about jamming system specific information into the checking
extensions. It's usually just a few if statements. So to be clear: we
can simply assume that the loop
link_path_walk
->do_follow_link
->foofs_follow_link
->vfs_follow_link
->link_path_walk
can happen exactly 5 times?

In general such restrictions are interesting, since they concretely
show how having a way to include system-specific knowlege into checking
is useful. Are there any other such relationships? The other example
I know of is the do_page_fault (sp?) routine that can potentially be
invoked at very copy_*_user site that page faults. You need to know
this to detect certain classes of deadlock, but there's no way to
discern this path from the code without a priori knowlege.

2002-06-13 22:44:33

by Oliver Xymoron

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

On Thu, 13 Jun 2002, Dawson Engler wrote:

> > On 13 Jun 2002, Andi Kleen wrote:
> >
> > > Alexander Viro <[email protected]> writes:
> > > >
> > > > I mean that due to the loop (link_path_walk->do_follow_link->foofs_follow_link
> > > > ->vfs_follow_link->link_path_walk) you will get infinite maximal depth
> > > > for everything that can be called by any of these functions. And that's
> > > > a _lot_ of stuff.
> > >
> > > Surely an analysis pass can detect recursive function chains and flag them
> > > (e.g. the global IPA alias analysis pass in open64 does this)
> >
> > Ugh... OK, let me try again:
> >
> > One bit of apriory information needed to get anything interesting from
> > this analysis: there is a set of mutually recursive functions (see above)
> > and there is a limit (5) on the depth of recursion in that loop.
> >
> > It has to be known to checker. Explicitly, since
> > a) automatically deducing it is not realistic
> > b) cutting off the stuff behind that loop will cut off a _lot_ of
> > things - both in filesystems and in VFS (and in block layer, while we are
> > at it).
>
> We're all about jamming system specific information into the checking
> extensions. It's usually just a few if statements. So to be clear: we
> can simply assume that the loop
> link_path_walk
> ->do_follow_link
> ->foofs_follow_link
> ->vfs_follow_link
> ->link_path_walk
> can happen exactly 5 times?
>
> In general such restrictions are interesting, since they concretely
> show how having a way to include system-specific knowlege into checking
> is useful. Are there any other such relationships? The other example
> I know of is the do_page_fault (sp?) routine that can potentially be
> invoked at very copy_*_user site that page faults. You need to know
> this to detect certain classes of deadlock, but there's no way to
> discern this path from the code without a priori knowlege.

There are various flags for memory allocation that prevent it (usually)
from being recursive (or livelocking).

Though I'm not really convinced by Al's argument. I think if you do a
breadth-first traversal of the possible call tree, as you get past a
certain predicted stack depth, you'll be able to manually prune things
that aren't of interest. Very few things should be potentially recursive
or mutally recursive and any that are probably bear closer manual
scrutiny.

How's Checker do on passing through function pointers?

--
"Love the dolphins," she advised him. "Write by W.A.S.T.E.."

2002-06-14 00:05:45

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

On Thu, Jun 13, 2002 at 09:01:30PM +0200, Andi Kleen wrote:
> if you see all possible paths through the program as a tree which branches
> for every decision then you only need to cut off the branches that are
> actually pointing upward the tree again. This would still allow to follow
> down into the callees of the recursive function because there should be
> at least one path that is non recursive (if not Checker should definitely
> complain ;)
> e.g.
> ----<-----------------+
> v |
> IF TRUE RECURSION
> -------+------ some path ----+
> |
> ELSE non recursive path
> +-------------------------- other functions ---------->
> Other functions can be still checked, you only need to prune the cycle.
> I have no idea if checker's algorithms actually work like this, but I could
> imagine that it would be one possible implementation.

Why would you do it this way? AFAICT this is more naturally phrased as
cycle detection in a digraph.

Cheers,
Bill

2002-06-14 00:25:53

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On Thu, 13 Jun 2002, Dawson Engler wrote:

> > It has to be known to checker. Explicitly, since
> > a) automatically deducing it is not realistic
> > b) cutting off the stuff behind that loop will cut off a _lot_ of
> > things - both in filesystems and in VFS (and in block layer, while we are
> > at it).
>
> We're all about jamming system specific information into the checking
> extensions. It's usually just a few if statements. So to be clear: we
> can simply assume that the loop
> link_path_walk
> ->do_follow_link
> ->foofs_follow_link
> ->vfs_follow_link
> ->link_path_walk
> can happen exactly 5 times?

static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
{
int err;
if (current->link_count >= 5)
goto loop;
...
current->link_count++;
...
err = dentry->d_inode->i_op->follow_link(dentry, nd);
current->link_count--;
return err;
loop:
path_release(nd);
return -ELOOP;
}

->link_count is modified only by the code above.

INIT_TASK has zero link_count.

new task inherits ->link_count of parent at the moment of do_fork()

It means that:
* at any moment ->link_count is between 0 and 5
* at any moment the call chain contains at most 6 instances of
do_follow_link()
* if there are 6 such instances then the last of them is either the
last element of chain or it is followed by path_release().

BTW, it shows a potential subtle problem - if we ever call do_fork()
while resolving a symlink (the only way that can happen is kernel_thread()
being called in process of symlink resolution) we will get a kernel thread
with limit for nested symlinks lower than 5. Proposed fix (both 2.4 and 2.5):

ed kernel/fork.c <<EOF
/swappable/a
p->link_count = 0;
.
w
EOF

Linus?

2002-06-14 10:06:20

by Helge Hafting

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

Roger Larsson wrote:

> > The automated checker may use hard-coded limits for recursions with
> > limited depth. If follow_link stops after n iterations, tell
> > the checker about it and it will use that in its computations.
>
> Alexander Viro <[email protected]> wrote:
> > (link_path_walk->do_follow_link->foofs_follow_link->
> > vfs_follow_link->link_path_walk)
>
> It would not need to follow the recursion at all.
>
> A simple warning "vfs_follow_link makes a recursive call back
> to link_path_walk, stack space needed for each recursion is N bytes"
>
That's all you can do with recursion of unknown depth, sure.

But the recursion here is known limited, and we want to know
"what is the deepest stack we can get into, following the
deepest call chain _after_ VFS recursed 5 levels of symlinks"
We know it won't recurse after that - but it might go
on calling x levels of non-recursive functions.

Hard-coding the limit for that particular call chain makes
sense as a lot of stuff may be called from it. Or similiar,
various stuff may pile up on the stack and _then_ call into VFS
and recurse to the limit.

Printing the hardcoded assumption is probably a good idea when
it is used to find some max depth - the kernel code might change
after all.


Helge Hafting

2002-06-16 00:48:35

by Andries E. Brouwer

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

> Not realistic - we have a recursion through the ->follow_link(), and
> a lot of stuff can be called from ->follow_link(). We _do_ have a
> limit on depth of recursion here, but it won't be fun to deal with.

It would not be impossibly difficult to remove the recursion altogether.

As far as I can see, the cycle essentially is
link_path_walk -> do_follow_link -> page_follow_link -> vfs_follow_link ->.

Now page_follow_link does very little after the vfs_follow_link call,
and the same holds for all other filesystem types.
Either foo_follow_link ends in
return vfs_follow_link(..);
or it does
err = vfs_follow_link(..);
if (something_to_free)
free_it;
return err;

With a little bit of polishing we could cut off all foo_follow_link
routines just before the vfs_follow_link call and make foo_follow_link
return the arguments it was going to call vfs_follow_link with.

Then of course the something_to_free must be freed by the caller.
What is it?

page_follow_link and nfs_follow_link have

if (page) {
kunmap(page);
page_cache_release(page);
}

jffs2_follow_link has

kfree(buf);

If that is done, there is no stack frame for foo_follow_link,
and in principle the three of link_path_walk, do_follow_link,
vfs_follow_link could be single routine. Today do_follow_link
is already inline.

Now symlink resolution can be entirely iterative.
The routine that does this works on a linked list of things
still to consider, and always works on the tail of the list.

Quite a lot of restructuring would be required to produce
a readable source in this style, but it doesn't look too difficult.
The result might even be more efficient.

Andries

2002-06-16 01:08:40

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On Sun, 16 Jun 2002 [email protected] wrote:

> As far as I can see, the cycle essentially is
> link_path_walk -> do_follow_link -> page_follow_link -> vfs_follow_link ->.

[snip the obvious strategy that doesn't work]

> Now symlink resolution can be entirely iterative.

Wrong. That breaks for anything with ->follow_link() that can't be expressed
as a single lookup on some path.

For fsck sake, give the folks here some credit - strategy above is _not_
hard to think up and it had been discussed on l-k several times.

2002-06-16 07:48:29

by Andries E. Brouwer

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

> Wrong. That breaks for anything with ->follow_link() that
> can't be expressed as a single lookup on some path.

I don't know what interesting out-of-tree filesystems you
have in mind, but it looks like this would work for all
systems in the current tree.

> For fsck sake, give the folks here some credit

Funny to hear that from you.

Andries


P.S. Now that you exist again, let me repeat a question
from a few weeks ago. Many years ago I constructed kdev_t,
a pointer to a struct containing the things that lived in
the arrays and some useful functions like a function returning
the name. It was passed around as reference to the device.
You are replacing kdev_t by struct block device *, that I
consider as a refcounted reference to the device. Fine.
But kdev_t is created by the driver, at the moment the
device is detected, while struct block device * is created
at open() time. Thus, the question arises where you plan to
store permanent device data like size or sectorsize.

2002-06-16 08:36:52

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On Sun, 16 Jun 2002 [email protected] wrote:

> > Wrong. That breaks for anything with ->follow_link() that
> > can't be expressed as a single lookup on some path.
>
> I don't know what interesting out-of-tree filesystems you
> have in mind, but it looks like this would work for all
> systems in the current tree.

Trivial counterexample: procfs.

> You are replacing kdev_t by struct block device *, that I
> consider as a refcounted reference to the device. Fine.
> But kdev_t is created by the driver, at the moment the
> device is detected, while struct block device * is created
> at open() time. Thus, the question arises where you plan to

It's a bit trickier than that - on both sides (driver might
modular and then the only thing that can guarantee its presence
is opened device; OTOH, struct block_device can stay around longer
than it's opened)

> store permanent device data like size or sectorsize.

Sector size is kept in the queue. Disk size - in per-disk objects
that are yet to be introduced in the tree.

Notice that your "permanent" data is not quite permanent - e.g.
information about partitioning can be lost (in all kernels since
2.0, if not earlier) at any moment when nobody has devices opened.
rmmod -a and there you go...

IMO correct approach to such data is that it's generated on demand
(as it is right now - again, consider modular driver; then we don't
see _anything_ about devices until somebody attempts to open one
of them and triggers module loading), cached at least until the things
are opened and forcibly invalidated by the driver when it goes away
or decide that it had lost the disk (e.g. rmmod of low-level SCSI
driver means that everything behind controllers of that type effectively
disappears).

IOW, we certainly should not forget everything upon the final close() -
instead we should leave invalidation to the driver or memory pressure.
And that includes the page cache of device - we need it to become
quiet on the final close so that no IO requests would be generated,
but we don't need it to disappear.

The next series of patches will make struct block_device keep the
information (->bd_op, page cache contents, etc.) past the final
close(). After that we can start moving the stuff in there without
breaking the warranties that exist in the current tree...

2002-06-16 10:00:27

by Andries E. Brouwer

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

On Sun, 16 Jun 2002 [email protected] wrote:

> > Wrong. That breaks for anything with ->follow_link() that
> > can't be expressed as a single lookup on some path.
>
> I don't know what interesting out-of-tree filesystems you
> have in mind, but it looks like this would work for all
> systems in the current tree.

Trivial counterexample: procfs.

That is trivially handled:

static inline int
do_follow_link(struct dentry *dentry, struct nameidata *nd) {
const char *link = NULL;
struct page *page = NULL;
int err;

...
err = dentry->d_inode->i_op->prepare_follow_link(dentry, nd, &link, &page);
if (nd->flags & FOLLOW_DONE)
nd->flags &= ~FOLLOW_DONE;
else if (err = 0)
err = __vfs_follow_link(nd, link);
if (page) {
kunmap(page);
page_cache_release(page);
}
...
return err;
}

You must come with more serious objections before I believe your
claim that this doesn't work.

----------
> You are replacing kdev_t by struct block device *, that I
> consider as a refcounted reference to the device. Fine.
> But kdev_t is created by the driver, at the moment the
> device is detected, while struct block device * is created
> at open() time. Thus, the question arises where you plan to
...
> store permanent device data like size or sectorsize.

Sector size is kept in the queue. Disk size - in per-disk objects
that are yet to be introduced in the tree.

Yes - it is these per-disk objects that I was referring to.
Recently I was polishing some SCSI code, and the next step
would have been unification with some IDE code that did precisely
the same things in an entirely different way. But in order to
remove this driver code I needed what I still think of as kdev_t
structs. So you will be introducing them after all. Good.

----------
Notice that your "permanent" data is not quite permanent - e.g.
information about partitioning can be lost (in all kernels since
2.0, if not earlier) at any moment when nobody has devices opened.
rmmod -a and there you go...

IMO correct approach to such data is that it's generated on demand
(as it is right now - again, consider modular driver; then we don't
see _anything_ about devices until somebody attempts to open one
of them and triggers module loading)

Yes, when the disk is removed, or when the driver is removed
it does not matter that information is lost.
But it is very bad to generate a partition table on each open.
Indeed, it is wrong.

User space can tell the kernel what the partitions are.
Opening a disk device must not destroy that information.
So some permanent info of the gendisk type is needed.

Andries

2002-06-16 10:33:59

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On Sun, 16 Jun 2002 [email protected] wrote:

> On Sun, 16 Jun 2002 [email protected] wrote:
>
> > > Wrong. That breaks for anything with ->follow_link() that
> > > can't be expressed as a single lookup on some path.
> >
> > I don't know what interesting out-of-tree filesystems you
> > have in mind, but it looks like this would work for all
> > systems in the current tree.
>
> Trivial counterexample: procfs.
>
> That is trivially handled:

I've said "trivial counterexample" ;-)

> err = dentry->d_inode->i_op->prepare_follow_link(dentry, nd, &link, &page);
> if (nd->flags & FOLLOW_DONE)
> nd->flags &= ~FOLLOW_DONE;
> else if (err = 0)
> err = __vfs_follow_link(nd, link);
> if (page) {
> kunmap(page);
> page_cache_release(page);
> }
> ...
> return err;
> }
>
> You must come with more serious objections before I believe your
> claim that this doesn't work.

First of all, _that_ is still recursive. And it's not easy to deal with -
you need to release the object holding the link body (BTW, that can
be almost anything - page, inode, kmalloc'ed area, vmalloc'ed area, etc.)
after __vfs_follow_link() is done. And that means (at the very least)
a stack of such objects, along with the information about their nature.
Which exposes a lot of information about the filesystem guts - so much
that we are basically introducing a cookie created by your ->prepare_...()
and having a destructor of its own.

I'd implemented that variant about 3 years ago. And it got vetoed by Linus -
for a very good reason. It was too damn ugly, with more ugliness to come
as we add more filesystems.


> Notice that your "permanent" data is not quite permanent - e.g.
> information about partitioning can be lost (in all kernels since
> 2.0, if not earlier) at any moment when nobody has devices opened.
> rmmod -a and there you go...
>
> IMO correct approach to such data is that it's generated on demand
> (as it is right now - again, consider modular driver; then we don't
> see _anything_ about devices until somebody attempts to open one
> of them and triggers module loading)
>
> Yes, when the disk is removed, or when the driver is removed
> it does not matter that information is lost.
> But it is very bad to generate a partition table on each open.
> Indeed, it is wrong.

D'oh. Indeed it is wrong - not to mention anything else, if we keep it
over the lifetime of struct block_device, there is no possible reason
to recalculate it. Sure thing, it's cached. And stays cached until
we are either told that it needs to be reread (BLKRRPART or your ioctls)
or that device itself is gone or that we didn't have that device (or
any of its partitions) for a long time, all their page caches are evicted
from memory and memory pressure is so bad that we want to start dropping
ancient and long-unused struct block_device.

Normal case is that we read partition table once - on the first open().
Then it stays cached.

> User space can tell the kernel what the partitions are.

... And that's also cached.

> Opening a disk device must not destroy that information.

Sure - why would it? If we already know where partition is - why would
we bother rereading that stuff?

> So some permanent info of the gendisk type is needed.

FSVO "permanent". Basically, struct block_device of entire disk is
going to be a lot more long-living beast than it is right now...

2002-06-16 10:57:08

by Andries E. Brouwer

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

> > > Wrong. That breaks for anything with ->follow_link() that
> > > can't be expressed as a single lookup on some path.
>
> in-tree systems give no problems; procfs is trivially handled:
>
> err = dentry->d_inode->i_op->prepare_follow_link(dentry, nd, &link, &page);
> if (nd->flags & FOLLOW_DONE)
> nd->flags &= ~FOLLOW_DONE;
> else if (err = 0)
> err = __vfs_follow_link(nd, link);
> if (page) {
> kunmap(page);
> page_cache_release(page);
> }
> ...
> return err;
> }
>
> You must come with more serious objections before I believe your
> claim that this doesn't work.

First of all, _that_ is still recursive. And it's not easy to deal with -
you need to release the object holding the link body (BTW, that can
be almost anything - page, inode, kmalloc'ed area, vmalloc'ed area, etc.)
after __vfs_follow_link() is done. And that means (at the very least)
a stack of such objects, along with the information about their nature.

Yes. But in the current tree only the cases page and kmalloc'ed area
occur, and it is easy to transform the single occurrence of kmalloc'ed area
(jffs2) into a use of page.

Which exposes a lot of information about the filesystem guts.

So, nothing about the filesystem guts is needed, the above code
says it all: when we are done, release page.

Anyway, you cleverly change the topic. First it was impossible and wrong.
Now it is ugly. I even maintain that the new code could be more
beautiful than the old code. And more compact.

The typical prepare_follow_link routine would be

int page_prepare_follow_link(struct dentry *dentry, struct nameidata *nd,
const char **llink, struct page **ppage)
{
*llink = page_getlink(dentry, ppage);
return 0;
}

Andries

2002-06-16 11:38:02

by Alexander Viro

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17



On Sun, 16 Jun 2002 [email protected] wrote:

> First of all, _that_ is still recursive. And it's not easy to deal with -
> you need to release the object holding the link body (BTW, that can
> be almost anything - page, inode, kmalloc'ed area, vmalloc'ed area, etc.)
> after __vfs_follow_link() is done. And that means (at the very least)
> a stack of such objects, along with the information about their nature.
>
> Yes. But in the current tree only the cases page and kmalloc'ed area
> occur, and it is easy to transform the single occurrence of kmalloc'ed area
> (jffs2) into a use of page.

That's simply not true. Of the top of my head - /proc/self.

Look, it's getting ridiculous - you've already got a method that sometimes
follows link, sometimes leaves a bunch of stuff for callers to handle
(BTW, we'll have to duplicate that fun in another caller of ->follow_link()
and that, thanks to usual POSIX elegance, is already butt-ugly). You've
got (at the very least) 3 stacks of objects - dentry, link and page.
And that - saying "that happens to cover all current cases, except for one
but we can shoehorn it". Aside of the fact that statement is false,
you are making a lot of assumptions about what's natural for code - both
out-of-tree and in-tree (see above).

And then you'll need to shove said stacks into struct nameidata. Which
means either a static limit again or all sorts of fun with "allocate
externally if it gets bigger than...".

Face it, it _is_ ugly. Been there, done that - if you want to repeat
the exercise, more power to you, but it won't be any prettier.

Sure thing, everything is equivalent to Turing Machine and with sufficient
amount of brute force and kludges everything that can be done at all can be
done in any given way. In this particular case that amount will be
prohibitively painful. You don't believe me - try it yourself and see how
gross it will get...

2002-06-16 13:13:59

by Andries E. Brouwer

[permalink] [raw]
Subject: Re: [CHECKER] 37 stack variables >= 1K in 2.4.17

> First of all, _that_ is still recursive. And it's not easy
> to deal with - you need to release the object holding the link body
> (BTW, that can be almost anything - page, inode, kmalloc'ed area,
> vmalloc'ed area, etc.) after __vfs_follow_link() is done.
> And that means (at the very least) a stack of such objects,
> along with the information about their nature.
>
> Yes. But in the current tree only the cases page and kmalloc'ed area
> occur, and it is easy to transform the single occurrence of kmalloc'ed
> areas (jffs2) into a use of page.

That's simply not true. Of the top of my head - /proc/self.

Ah, yes - that has the string on stack.
That changes my inventory into: all callers except two use a page.
One (jffs2) uses a kmalloced area. One (/proc/self) has its data
on the stack.

Look, it's getting ridiculous.

That is a counterproductive reply.
You say "I tried to improve Linux three years ago and failed,
so you can forget it - no improvement is possible".

Of course improvement is possible. It always is.

Now we find that it is possible to change recursive symlink handling
into iterative handling. That is good, because the current limit of
5 is really a bit low. It bites every now and then.

Remains the question whether it can be done in a beautiful way.

Let us try. The basic object is a struct link_work that has dentry,
nameidata, link, page and flags and a pointer to the next such struct.
This is a reverse linked list: the thing we work on is in front,
the tail of the list is the thing we originally started to resolve.

The routine that does all this is

int do_link_work(struct link_work **lw) {
int err = 0;

while((*lw)->link) {
if (err == 0)
err = do_link_item(lw);
else
do_bad_link_item(lw);
}
return err;
}

where do_link_item() has as duty to handle the next part
of the pathname. That diminishes the work left, unless
that next part was a symlink, in which case resolution
of that is prepended to the list of work we have to do.
If something is wrong, do_bad_link_item() only releases resources.

Looks like a nice and clean setup.
Remains the question how to release the resources allocated
by the filesystems in prepare_follow_link().
There are various possibilities. General ones: the filesystem
provides a callback. Restricted ones: we ask the filesystem
to have one page as the only resource. Kludgy ones: we allow
some explicit short list, like page or kmalloc. Probably others.

But maybe you are not interested in thinking about such things.
You did it already.

Andries