Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id ; Wed, 17 Oct 2001 05:11:27 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id ; Wed, 17 Oct 2001 05:11:18 -0400 Received: from mail.loewe-komp.de ([62.156.155.230]:49159 "EHLO mail.loewe-komp.de") by vger.kernel.org with ESMTP id ; Wed, 17 Oct 2001 05:11:14 -0400 Message-ID: <3BCD4C0C.DF5D5C0@loewe-komp.de> Date: Wed, 17 Oct 2001 11:14:52 +0200 From: Peter =?iso-8859-1?Q?W=E4chtler?= Organization: LOEWE. Hannover X-Mailer: Mozilla 4.76 [de] (X11; U; Linux 2.4.9-ac3 i686) X-Accept-Language: de, en MIME-Version: 1.0 To: lkml Subject: NFS related Oops in 2.4.[39]-xfs Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org noisy:/usr/src/linux # uname -a Linux noisy 2.4.3-XFS #8 SMP Sam Mai 19 18:21:36 CEST 2001 i686 unknown dual board with just one processor /usr/local and /home on LogicalVolumes tried also 2.4.9-XFS (but see below) ------------ oops ------------- Warning (compare_maps): ksyms_base symbol __rta_fill_R__ver___rta_fill not found in System.map. Ignoring ksyms_base entry [other warnings stripped] Oops: 0000 CPU: 0 EIP: 0010: Using defaults from ksymoops -t elf32-i386 -a i386 EFLAGS: 00010246 eax: 00000000 ebx: 00000000 ecx: cff805b8 edx: 00000010 esi: ca018260 edi: ca0182e0 ebp: cb9e5800 esp: cb9edeec ds: 0018 es: 0018 ss: 0018 Stack: 0100708b ca018360 c016b7d8 ca0182e0 00000002 cba07000 cb9efc00 cb9e5800 ce078bc4 ca018360 00000000 ca018360 c016bb70 ce078a80 0102a048 00000005 Call Trace: c0169b73> Code: 8b 40 10 39 d0 74 21 8d 58 c8 39 f3 75 06 8b 5a 04 83 c3 c8 >>EIP; c016b3c4 <===== Trace; c01bb7d8 Trace; c016bb70 Trace; c016a0b5 Code; c016b3c4 00000000 <_EIP>: Code; c016b3c4 <===== 0: 8b 40 10 mov 0x10(%eax),%eax <===== Code; c016b3c7 3: 39 d0 cmp %edx,%eax Code; c016b3c9 5: 74 21 je 28 <_EIP+0x28> c016b3ec Code; c016b3cb 7: 8d 58 c8 lea 0xffffffc8(%eax),%ebx Code; c016b3ce a: 39 f3 cmp %esi,%ebx Code; c016b3d0 c: 75 06 jne 14 <_EIP+0x14> c016b3d8 Code; c016b3d2 e: 8b 5a 04 mov 0x4(%edx),%ebx Code; c016b3d5 11: 83 c3 c8 add $0xffffffc8,%ebx 31 warnings issued. Results may not be reliable. ------------ oops ------------- Why do I get all those warnings about symbol mismatch? The code causing the Oops: struct dentry *nfsd_findparent(struct dentry *child) { struct dentry *tdentry, *pdentry; tdentry = d_alloc(child, &(const struct qstr) {"..", 2, 0}); if (!tdentry) return ERR_PTR(-ENOMEM); /* I'm going to assume that if the returned dentry is different, then * it is well connected. But nobody returns different dentrys do they? */ pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); d_drop(tdentry); /* we never want ".." hashed */ if (!pdentry) { /* I don't want to return a ".." dentry. * I would prefer to return an unconnected "IS_ROOT" dentry, * though a properly connected dentry is even better */ /* if first or last of alias list is not tdentry, use that * else make a root dentry */ struct list_head *aliases = &tdentry->d_inode->i_dentry; spin_lock(&dcache_lock); if (aliases->next != aliases) { <=========== CRASH !!!!!!!!!!!!!!!!!!!!!!! pdentry = list_entry(aliases->next, struct dentry, d_alias); if (pdentry == tdentry) pdentry = list_entry(aliases->prev, struct dentry, d_alias); if (pdentry == tdentry) pdentry = NULL; if (pdentry) dget_locked(pdentry); } spin_unlock(&dcache_lock); if (pdentry == NULL) { pdentry = d_alloc_root(igrab(tdentry->d_inode)); if (pdentry) { pdentry->d_flags |= DCACHE_NFSD_DISCONNECTED; d_rehash(pdentry); } } if (pdentry == NULL) pdentry = ERR_PTR(-ENOMEM); } dput(tdentry); /* it is not hashed, it will be discarded */ return pdentry; } (gdb) disass nfsd_findparent Dump of assembler code for function nfsd_findparent: 0xc016b34c : push %esi 0xc016b34d : push %ebx 0xc016b34e : mov 0xc(%esp,1),%ebx 0xc016b352 : push $0xc02ce724 0xc016b357 : push %ebx 0xc016b358 : call 0xc0144608 0xc016b35d : mov %eax,%esi 0xc016b35f : add $0x8,%esp tdentry = d_alloc(child, &(const struct qstr) {"..", 2, 0}); 0xc016b362 : test %esi,%esi if (!tdentry) 0xc016b364 : jne 0xc016b370 0xc016b366 : mov $0xfffffff4,%eax 0xc016b36b : jmp 0xc016b431 return ERR_PTR(-ENOMEM); 0xc016b370 : mov 0x8(%ebx),%eax 0xc016b373 : mov 0x84(%eax),%edx 0xc016b379 : push %esi 0xc016b37a : push %eax 0xc016b37b : mov 0x4(%edx),%eax 0xc016b37e : call *%eax 0xc016b380 : mov %eax,%ebx 0xc016b382 : add $0x8,%esp pdentry = child->d_inode->i_op->lookup(child->d_inode, tdentry); static __inline__ void d_drop(struct dentry * dentry) { spin_lock(&dcache_lock); list_del(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_hash); spin_unlock(&dcache_lock); } 0xc016b385 : lock decb 0xc0329898 0xc016b38c : js 0xc02aefa4 0xc016b392 : lea 0x18(%esi),%eax 0xc016b395 : mov 0x4(%eax),%ecx 0xc016b398 : mov 0x18(%esi),%edx 0xc016b39b : mov %ecx,0x4(%edx) 0xc016b39e : mov %edx,(%ecx) 0xc016b3a0 : mov %eax,0x18(%esi) 0xc016b3a3 : mov %eax,0x1c(%esi) 0xc016b3a6 : movb $0x1,0xc0329898 d_drop(tdentry); /* we never want ".." hashed */ 0xc016b3ad : test %ebx,%ebx 0xc016b3af : jne 0xc016b426 if (!pdentry) { 0xc016b3b1 : mov 0x8(%esi),%eax 0xc016b3b4 : lea 0x10(%eax),%edx // edx holds aliases struct list_head *aliases = &tdentry->d_inode->i_dentry; // is tdentry->d_inode->i_dentry not valid anymore? aliases gets NULL 0xc016b3b7 : lock decb 0xc0329898 0xc016b3be : js 0xc02aefb4 spin_lock(&dcache_lock); 0xc016b3c4 : mov 0x10(%eax),%eax <===================== CRASH !!! 0xc016b3c7 : cmp %edx,%eax // eax holds aliases->next if (aliases->next != aliases) { 0xc016b3c9 : je 0xc016b3ec 0xc016b3cb : lea 0xffffffc8(%eax),%ebx pdentry = list_entry(aliases->next, struct dentry, d_alias); 0xc016b3ce : cmp %esi,%ebx 0xc016b3d0 : jne 0xc016b3d8 0xc016b3d2 : mov 0x4(%edx),%ebx 0xc016b3d5 : add $0xffffffc8,%ebx 0xc016b3d8 : xor %eax,%eax 0xc016b3da : cmp %esi,%ebx 0xc016b3dc : cmove %eax,%ebx 0xc016b3df : test %ebx,%ebx 0xc016b3e1 : je 0xc016b3ec 0xc016b3e3 : push %ebx 0xc016b3e4 : call 0xc0144080 0xc016b3e9 : add $0x4,%esp if (pdentry) dget_locked(pdentry); 0xc016b3ec : movb $0x1,0xc0329898 0xc016b3f3 : test %ebx,%ebx 0xc016b3f5 : jne 0xc016b41c spin_unlock(&dcache_lock); 0xc016b3f7 : mov 0x8(%esi),%eax 0xc016b3fa : push %eax 0xc016b3fb : call 0xc0145ee8 0xc016b400 : push %eax 0xc016b401 : call 0xc01447f4 0xc016b406 : mov %eax,%ebx 0xc016b408 : add $0x8,%esp pdentry = d_alloc_root(igrab(tdentry->d_inode)); 0xc016b40b : test %ebx,%ebx 0xc016b40d : je 0xc016b41c 0xc016b40f : orb $0x4,0x4(%ebx) 0xc016b413 : push %ebx 0xc016b414 : call 0xc0144ab4 0xc016b419 : add $0x4,%esp 0xc016b41c : mov $0xfffffff4,%eax 0xc016b421 : test %ebx,%ebx 0xc016b423 : cmove %eax,%ebx 0xc016b426 : push %esi } /* if (!pdentry) */ 0xc016b427 : call 0xc0143e90 0xc016b42c : mov %ebx,%eax 0xc016b42e : add $0x4,%esp 0xc016b431 : pop %ebx 0xc016b432 : pop %esi 0xc016b433 : ret End of assembler dump. (gdb) 2.4.3-xfs is compiled with gcc 2.95.2 2.4.9-XFS is compiled with gcc 2.91.6 After the crash I tried to reboot, but it quickly failed again. reboot system boot 2.4.3-XFS Mon Oct 15 18:20 (21:49) reboot system boot 2.4.3-XFS Mon Oct 15 18:17 (21:52) reboot system boot 2.4.3-XFS Mon Oct 15 18:13 (21:57) reboot system boot 2.4.3-XFS Thu Oct 11 16:51 (4+23:18) reboot system boot 2.4.3-XFS Thu Oct 11 16:32 (4+23:37) reboot system boot 2.4.3-XFS Thu Oct 11 16:28 (4+23:41) reboot system boot 2.4.3-XFS Thu Oct 11 16:23 (4+23:46) reboot system boot 2.4.3-XFS Thu Oct 11 16:12 (4+23:57) reboot system boot 2.4.3-XFS Thu Oct 11 15:05 (5+01:04) reboot system boot 2.4.9-xfs Thu Oct 4 17:02 (11+23:08) reboot system boot 2.4.9-xfs Thu Oct 4 16:54 (11+23:15) reboot system boot 2.4.3-XFS Thu Oct 4 16:47 (11+23:23) reboot system boot 2.4.9-xfs Thu Oct 4 16:39 (11+23:31) reboot system boot 2.4.9-xfs Thu Oct 4 16:14 (11+23:55) To recover I had to unplug/powerdown all connected clients. :-( Uhh. I think 2.4.9 crashed at the same instructions. But I can't reproduce it on demand (hey, in the meantime we can work!). Seeing the NFS related changes in 2.4.10: should I upgrade? We use only NFSv2. Is NFSv3 more stable, if that matters here? The machine with 2.4.3 was up for several months - with light load. Now the number of crashes (with 6 NFS clients using /home + cvs) went up. We do mount our compile environment in a strange way: NOTE: /server is a symlink to /usr/local/export # See exports(5) for a description. # This file contains a list of all directories exported to other computers. # It is used by rpc.nfsd and rpc.mountd. /opt/xxx *.xxx(ro) /home *.xxx(rw) /tmp *.xxx(rw) /usr/local/export *.xxx(rw) /etc/fstab devserv:/server/compileenv/lib/lib \ /compenv/xxx/lib nfs ro,auto 0 0 devserv:/server/compileenv/usr_lib/usr/lib \ /compenv/xxx/usr_lib nfs ro,auto 0 0 devserv:/server/compileenv/gcc-lib \ /compenv/xxx/gcc-lib nfs ro,exec,auto 0 0 devserv:/server/compileenv/usr_include/usr/include \ /compenv/xxx/usr_include nfs ro,auto 0 0 devserv:/server/compileenv/usr_linux/usr/linux/include \ /compenv/xxx/usr_linux_include nfs ro,auto 0 0 Would it help to mount a single /usr/local/export and work with a symlink tree (as I actually do with autofs) ? What can cause the "invalid" dentries? Did someone remove some files or were some dentries de'hashed because of dcache growth? As workaround I will do: struct list_head *aliases = &tdentry->d_inode->i_dentry; if (aliases && (aliases->next != aliases) ) { spin_lock(&dcache_lock); pdentry = list_entry(aliases->next, struct dentry, d_alias); if (pdentry == tdentry) pdentry = list_entry(aliases->prev, struct dentry, d_alias); if (pdentry == tdentry) pdentry = NULL; if (pdentry) dget_locked(pdentry); spin_unlock(&dcache_lock); } else pdentry=NULL; - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/