Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id ; Wed, 17 Oct 2001 05:31:11 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id ; Wed, 17 Oct 2001 05:31:03 -0400 Received: from rj.sgi.com ([204.94.215.100]:8912 "EHLO rj.sgi.com") by vger.kernel.org with ESMTP id ; Wed, 17 Oct 2001 05:30:48 -0400 Message-Id: <200110170928.f9H9SsP07618@jen.americas.sgi.com> X-Mailer: exmh version 2.2 06/23/2000 with nmh-1.0.4 To: Peter =?iso-8859-1?Q?W=E4chtler?= cc: lkml Subject: Re: NFS related Oops in 2.4.[39]-xfs In-Reply-To: Message from Peter =?iso-8859-1?Q?W=E4chtler?= of "Wed, 17 Oct 2001 11:14:52 +0200." <3BCD4C0C.DF5D5C0@loewe-komp.de> Date: Wed, 17 Oct 2001 04:28:54 -0500 From: Steve Lord Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Where did you get your kernel (the 2.4.9 version that is) this problem sounds familiar, but I am pretty sure we fixed this case in XFS somewhere between 2.4.3 and 2.4.9. Steve > noisy:/usr/src/linux # uname -a > Linux noisy 2.4.3-XFS #8 SMP Sam Mai 19 18:21:36 CEST 2001 i686 unknown > > dual board with just one processor > /usr/local and /home on LogicalVolumes > tried also 2.4.9-XFS (but see below) > > ------------ oops ------------- > Warning (compare_maps): ksyms_base symbol __rta_fill_R__ver___rta_fill not fo > und in > System.map. Ignoring ksyms_base entry > [other warnings stripped] > Oops: 0000 > CPU: 0 > EIP: 0010: > Using defaults from ksymoops -t elf32-i386 -a i386 > EFLAGS: 00010246 > eax: 00000000 ebx: 00000000 ecx: cff805b8 edx: 00000010 > esi: ca018260 edi: ca0182e0 ebp: cb9e5800 esp: cb9edeec > ds: 0018 es: 0018 ss: 0018 > Stack: 0100708b ca018360 c016b7d8 ca0182e0 00000002 cba07000 cb9efc00 cb9e580 > 0 > ce078bc4 ca018360 00000000 ca018360 c016bb70 ce078a80 0102a048 0000000 > 5 > Call Trace: c0169b73> < > c01054c4> > Code: 8b 40 10 39 d0 74 21 8d 58 c8 39 f3 75 06 8b 5a 04 83 c3 c8 > > >>EIP; c016b3c4 <===== > Trace; c01bb7d8 > Trace; c016bb70 > Trace; c016a0b5 > Code; c016b3c4 > 00000000 <_EIP>: > Code; c016b3c4 <===== > 0: 8b 40 10 mov 0x10(%eax),%eax <===== > Code; c016b3c7 > 3: 39 d0 cmp %edx,%eax > Code; c016b3c9 > 5: 74 21 je 28 <_EIP+0x28> c016b3ec rent+a0/e8> > Code; c016b3cb > 7: 8d 58 c8 lea 0xffffffc8(%eax),%ebx > Code; c016b3ce > a: 39 f3 cmp %esi,%ebx > Code; c016b3d0 > c: 75 06 jne 14 <_EIP+0x14> c016b3d8 rent+8c/e8> > Code; c016b3d2 > e: 8b 5a 04 mov 0x4(%edx),%ebx > Code; c016b3d5 > 11: 83 c3 c8 add $0xffffffc8,%ebx > > > 31 warnings issued. Results may not be reliable. > ------------ oops ------------- > > Why do I get all those warnings about symbol mismatch? > > > The code causing the Oops: > > struct dentry *nfsd_findparent(struct dentry *child) > { > struct dentry *tdentry, *pdentry; > tdentry = d_alloc(child, &(const struct qstr) {"..", 2, 0}); > if (!tdentry) > return ERR_PTR(-ENOMEM); > > /* I'm going to assume that if the returned dentry is different, then > * it is well connected. But nobody returns different dentrys do the > y? > */ > pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); > d_drop(tdentry); /* we never want ".." hashed */ > if (!pdentry) { > /* I don't want to return a ".." dentry. > * I would prefer to return an unconnected "IS_ROOT" dentry, > * though a properly connected dentry is even better > */ > /* if first or last of alias list is not tdentry, use that > * else make a root dentry > */ > struct list_head *aliases = &tdentry->d_inode->i_dentry; > spin_lock(&dcache_lock); > if (aliases->next != aliases) { <=========== CRASH > !!!!!!!!!!!!!!!!!!!!!!! > pdentry = list_entry(aliases->next, struct dentry, d_ > alias); > if (pdentry == tdentry) > pdentry = list_entry(aliases->prev, struct de > ntry, > d_alias); > if (pdentry == tdentry) > pdentry = NULL; > if (pdentry) dget_locked(pdentry); > } > spin_unlock(&dcache_lock); > if (pdentry == NULL) { > pdentry = d_alloc_root(igrab(tdentry->d_inode)); > if (pdentry) { > pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED; > d_rehash(pdentry); > } > } > if (pdentry == NULL) > pdentry = ERR_PTR(-ENOMEM); > } > dput(tdentry); /* it is not hashed, it will be discarded */ > return pdentry; > } > > > (gdb) disass nfsd_findparent > Dump of assembler code for function nfsd_findparent: > 0xc016b34c : push %esi > 0xc016b34d : push %ebx > 0xc016b34e : mov 0xc(%esp,1),%ebx > 0xc016b352 : push $0xc02ce724 > 0xc016b357 : push %ebx > 0xc016b358 : call 0xc0144608 > 0xc016b35d : mov %eax,%esi > 0xc016b35f : add $0x8,%esp > tdentry = d_alloc(child, &(const struct qstr) {"..", 2, 0}); > > 0xc016b362 : test %esi,%esi > if (!tdentry) > 0xc016b364 : jne 0xc016b370 > > 0xc016b366 : mov $0xfffffff4,%eax > 0xc016b36b : jmp 0xc016b431 9> > return ERR_PTR(-ENOMEM); > > > 0xc016b370 : mov 0x8(%ebx),%eax > 0xc016b373 : mov 0x84(%eax),%edx > 0xc016b379 : push %esi > 0xc016b37a : push %eax > 0xc016b37b : mov 0x4(%edx),%eax > 0xc016b37e : call *%eax > 0xc016b380 : mov %eax,%ebx > 0xc016b382 : add $0x8,%esp > pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); > > static __inline__ void d_drop(struct dentry * dentry) > { > spin_lock(&dcache_lock); > list_del(&dentry->d_hash); > INIT_LIST_HEAD(&dentry->d_hash); > spin_unlock(&dcache_lock); > } > > 0xc016b385 : lock decb 0xc0329898 > 0xc016b38c : js 0xc02aefa4 > 0xc016b392 : lea 0x18(%esi),%eax > 0xc016b395 : mov 0x4(%eax),%ecx > 0xc016b398 : mov 0x18(%esi),%edx > 0xc016b39b : mov %ecx,0x4(%edx) > 0xc016b39e : mov %edx,(%ecx) > 0xc016b3a0 : mov %eax,0x18(%esi) > 0xc016b3a3 : mov %eax,0x1c(%esi) > 0xc016b3a6 : movb $0x1,0xc0329898 > > d_drop(tdentry); /* we never want ".." hashed */ > > > 0xc016b3ad : test %ebx,%ebx > 0xc016b3af : jne 0xc016b426 8> > if (!pdentry) { > > 0xc016b3b1 : mov 0x8(%esi),%eax > 0xc016b3b4 : lea 0x10(%eax),%edx // edx holds > aliases > struct list_head *aliases = &tdentry->d_inode->i_dentry; > > // is tdentry->d_inode->i_dentry not valid anymore? aliases ge > ts NULL > > > 0xc016b3b7 : lock decb 0xc0329898 > 0xc016b3be : js 0xc02aefb4 > spin_lock(&dcache_lock); > > 0xc016b3c4 : mov 0x10(%eax),%eax <========== > =========== > CRASH !!! > 0xc016b3c7 : cmp %edx,%eax // eax holds alias > es->next > if (aliases->next != aliases) { > > 0xc016b3c9 : je 0xc016b3ec 0> > 0xc016b3cb : lea 0xffffffc8(%eax),%ebx > pdentry = list_entry(aliases->next, struct dentry, d_ > alias); > > 0xc016b3ce : cmp %esi,%ebx > 0xc016b3d0 : jne 0xc016b3d8 0> > 0xc016b3d2 : mov 0x4(%edx),%ebx > 0xc016b3d5 : add $0xffffffc8,%ebx > > 0xc016b3d8 : xor %eax,%eax > 0xc016b3da : cmp %esi,%ebx > 0xc016b3dc : cmove %eax,%ebx > 0xc016b3df : test %ebx,%ebx > 0xc016b3e1 : je 0xc016b3ec 0> > 0xc016b3e3 : push %ebx > 0xc016b3e4 : call 0xc0144080 > 0xc016b3e9 : add $0x4,%esp > if (pdentry) dget_locked(pdentry); > > 0xc016b3ec : movb $0x1,0xc0329898 > 0xc016b3f3 : test %ebx,%ebx > 0xc016b3f5 : jne 0xc016b41c 8> > spin_unlock(&dcache_lock); > > 0xc016b3f7 : mov 0x8(%esi),%eax > 0xc016b3fa : push %eax > 0xc016b3fb : call 0xc0145ee8 > 0xc016b400 : push %eax > 0xc016b401 : call 0xc01447f4 > 0xc016b406 : mov %eax,%ebx > 0xc016b408 : add $0x8,%esp > pdentry = d_alloc_root(igrab(tdentry->d_inode)); > > 0xc016b40b : test %ebx,%ebx > 0xc016b40d : je 0xc016b41c 8> > 0xc016b40f : orb $0x4,0x4(%ebx) > 0xc016b413 : push %ebx > 0xc016b414 : call 0xc0144ab4 > 0xc016b419 : add $0x4,%esp > 0xc016b41c : mov $0xfffffff4,%eax > 0xc016b421 : test %ebx,%ebx > 0xc016b423 : cmove %eax,%ebx > 0xc016b426 : push %esi > } /* if (!pdentry) */ > 0xc016b427 : call 0xc0143e90 > 0xc016b42c : mov %ebx,%eax > 0xc016b42e : add $0x4,%esp > 0xc016b431 : pop %ebx > 0xc016b432 : pop %esi > 0xc016b433 : ret > End of assembler dump. > (gdb) > > > 2.4.3-xfs is compiled with gcc 2.95.2 > 2.4.9-XFS is compiled with gcc 2.91.6 > > After the crash I tried to reboot, but it quickly failed again. > reboot system boot 2.4.3-XFS Mon Oct 15 18:20 (21:49) > reboot system boot 2.4.3-XFS Mon Oct 15 18:17 (21:52) > reboot system boot 2.4.3-XFS Mon Oct 15 18:13 (21:57) > > reboot system boot 2.4.3-XFS Thu Oct 11 16:51 (4+23:18) > reboot system boot 2.4.3-XFS Thu Oct 11 16:32 (4+23:37) > reboot system boot 2.4.3-XFS Thu Oct 11 16:28 (4+23:41) > reboot system boot 2.4.3-XFS Thu Oct 11 16:23 (4+23:46) > reboot system boot 2.4.3-XFS Thu Oct 11 16:12 (4+23:57) > reboot system boot 2.4.3-XFS Thu Oct 11 15:05 (5+01:04) > > reboot system boot 2.4.9-xfs Thu Oct 4 17:02 (11+23:08) > reboot system boot 2.4.9-xfs Thu Oct 4 16:54 (11+23:15) > reboot system boot 2.4.3-XFS Thu Oct 4 16:47 (11+23:23) > reboot system boot 2.4.9-xfs Thu Oct 4 16:39 (11+23:31) > reboot system boot 2.4.9-xfs Thu Oct 4 16:14 (11+23:55) > > > To recover I had to unplug/powerdown all connected clients. :-( Uhh. > I think 2.4.9 crashed at the same instructions. But I can't reproduce > it on demand (hey, in the meantime we can work!). > Seeing the NFS related changes in 2.4.10: should I upgrade? > We use only NFSv2. Is NFSv3 more stable, if that matters here? > > The machine with 2.4.3 was up for several months - with light load. > Now the number of crashes (with 6 NFS clients using /home + cvs) went up. > > We do mount our compile environment in a strange way: > > NOTE: /server is a symlink to /usr/local/export > # See exports(5) for a description. > # This file contains a list of all directories exported to other computers. > # It is used by rpc.nfsd and rpc.mountd. > /opt/xxx *.xxx(ro) > /home *.xxx(rw) > /tmp *.xxx(rw) > /usr/local/export *.xxx(rw) > > > /etc/fstab > devserv:/server/compileenv/lib/lib \ > /compenv/xxx/lib nfs ro,auto 0 0 > devserv:/server/compileenv/usr_lib/usr/lib \ > /compenv/xxx/usr_lib nfs ro,auto 0 0 > devserv:/server/compileenv/gcc-lib \ > /compenv/xxx/gcc-lib nfs ro,exec,auto 0 0 > devserv:/server/compileenv/usr_include/usr/include \ > /compenv/xxx/usr_include nfs ro,auto 0 0 > devserv:/server/compileenv/usr_linux/usr/linux/include \ > /compenv/xxx/usr_linux_include nfs ro,auto 0 0 > > Would it help to mount a single /usr/local/export and work with a > symlink tree (as I actually do with autofs) ? > What can cause the "invalid" dentries? Did someone remove some files > or were some dentries de'hashed because of dcache growth? > > As workaround I will do: > > struct list_head *aliases = &tdentry->d_inode->i_dentry; > > if (aliases && (aliases->next != aliases) ) { > spin_lock(&dcache_lock); > pdentry = list_entry(aliases->next, struct dentry, d_ > alias); > if (pdentry == tdentry) > pdentry = list_entry(aliases->prev, struct de > ntry, > d_alias); > if (pdentry == tdentry) > pdentry = NULL; > if (pdentry) dget_locked(pdentry); > spin_unlock(&dcache_lock); > } else > pdentry=NULL; > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ > - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/