2007-02-28 20:13:34

by Adam Litke

[permalink] [raw]
Subject: Kernel Oops with shm namespace cleanups

Hey. While testing 2.6.21-rc2 with libhugetlbfs, the shm-fork test case
causes the kernel to oops. To reproduce: Execute 'make check' in the
latest libhugetlbfs source on a 2.6.21-rc2 kernel with 100 huge pages
allocated. Using fewer huge pages will likely also trigger the oops.
Libhugetlbfs can be downloaded from:
http://libhugetlbfs.ozlabs.org/snapshots/libhugetlbfs-dev-20070228.tar.gz

I have collected the following information:

bc56bba8f31bd99f350a5ebfd43d50f411b620c7 is first bad commit
commit bc56bba8f31bd99f350a5ebfd43d50f411b620c7
Author: Eric W. Biederman <[email protected]>
Date: Tue Feb 20 13:57:53 2007 -0800

[PATCH] shm: make sysv ipc shared memory use stacked files

------------[ cut here ]------------
Oops: Exception in kernel mode, sig: 5 [#1]
SMP NR_CPUS=32 NUMA
Modules linked in:
NIP: C00000000002EA80 LR: C0000000000A3F70 CTR: 0000000064000000
REGS: c00000077967b770 TRAP: 0700 Not tainted (2.6.20-g1df49008)
MSR: 8000000000029032 <EE,ME,IR,DR> CR: 28000448 XER: 00000000
TASK = c00000002f6737d0[3042] 'shm-fork' THREAD: c000000779678000 CPU: 1
GPR00: 0000000000000000 C00000077967B9F0 C0000000006725A0 C00000002F94EC00
GPR04: 0000000093FD1000 0000000093FD1000 0000000002000000 0000000093FD1000
GPR08: 0000000000000001 0000000000000000 0000000000000001 0000000000000001
GPR12: 0000000048000444 C00000000058BE00 00000000FFEE8094 0000000000000000
GPR16: 0000000002000000 00000000100AC5E8 00000000100A0000 0000000010080000
GPR20: 0000000000000000 0000000093FD1000 C00000077FDBD088 C00000002F94EC00
GPR24: C00000077FDBD088 0000000002000000 C00000002F94EC00 0000000093FD1000
GPR28: C00000077967BEA0 0000000093FD1000 C0000000005A2F58 C00000077FDBD088
NIP [C00000000002EA80] .huge_pte_alloc+0x7c/0x1dc
LR [C0000000000A3F70] .hugetlb_fault+0x48/0x150
Call Trace:
[C00000077967B9F0] [C00000077967BA80] 0xc00000077967ba80 (unreliable)
[C00000077967BAA0] [C0000000000A3F70] .hugetlb_fault+0x48/0x150
[C00000077967BB50] [C000000000094254] .__handle_mm_fault+0xa8/0x119c
[C00000077967BC50] [C00000000002A1E0] .do_page_fault+0x3a8/0x57c
[C00000077967BE30] [C000000000004AFC] handle_page_fault+0x20/0x58
Instruction dump:
78000020 7fa40040 409d0010 a00302be 7889c220 4800000c a00302bc 78892702
7c004e30 780907e1 40820008 39600001 <0b0b0000> e922adb8 3800ffff ebda0048
------------[ cut here ]------------
kernel BUG at /home/aglitke/git/linux-2.6/mm/hugetlb.c:375!
Oops: Exception in kernel mode, sig: 5 [#2]
SMP NR_CPUS=32 NUMA
Modules linked in:
NIP: C0000000000A3518 LR: C0000000000A376C CTR: C00000000006B348
REGS: c00000077967ace0 TRAP: 0700 Not tainted (2.6.20-g1df49008)
MSR: 8000000000029032 <EE,ME,IR,DR> CR: 42022442 XER: 00000000
TASK = c00000002f6737d0[3042] 'shm-fork' THREAD: c000000779678000 CPU: 1
GPR00: 0000000000000018 C00000077967AF60 C0000000006725A0 C00000077FDBD088
GPR04: 0000000093FD1000 00000000F7FD1000 C00000077FFA5A83 C00000077FFEF6E0
GPR08: 0000000010013000 0000000000FD1000 0000000010013000 C000000000697EB0
GPR12: 0000000022004444 C00000000058BE00 0000000010013000 0000000010013000
GPR16: 0000000010013000 FFFFFFFFFFFFFFFF 0000000000000000 C00000077967B120
GPR20: 00000000F7FD1000 0000000000000000 C0000000040DBDD0 C00000077FDBD088
GPR24: 000000EF9C340793 0000000010013000 C00000002F94EC00 C00000077967AFD0
GPR28: 00000000F7FD1000 0000000093FD1000 C0000000005A2F58 C00000002F94EC00
NIP [C0000000000A3518] .__unmap_hugepage_range+0x68/0x264
LR [C0000000000A376C] .unmap_hugepage_range+0x58/0xa0
Call Trace:
[C00000077967AF60] [0000000000000001] 0x1 (unreliable)
[C00000077967B020] [C0000000000A376C] .unmap_hugepage_range+0x58/0xa0
[C00000077967B0B0] [C000000000091464] .unmap_vmas+0x17c/0x954
[C00000077967B210] [C000000000099488] .exit_mmap+0xa4/0x17c
[C00000077967B2C0] [C00000000004CB08] .mmput+0x60/0x160
[C00000077967B360] [C000000000052E4C] .exit_mm+0x130/0x154
[C00000077967B400] [C0000000000535D8] .do_exit+0x238/0x964
[C00000077967B4C0] [C000000000022AC4] .die+0x150/0x154
[C00000077967B550] [C000000000022B10] ._exception+0x48/0x138
[C00000077967B660] [C000000000023634] .program_check_exception+0x5cc/0x5e4
[C00000077967B700] [C0000000000046F4] program_check_common+0xf4/0x100
--- Exception: 700 at .huge_pte_alloc+0x7c/0x1dc
LR = .hugetlb_fault+0x48/0x150
[C00000077967B9F0] [C00000077967BA80] 0xc00000077967ba80 (unreliable)
[C00000077967BAA0] [C0000000000A3F70] .hugetlb_fault+0x48/0x150
[C00000077967BB50] [C000000000094254] .__handle_mm_fault+0xa8/0x119c
[C00000077967BC50] [C00000000002A1E0] .do_page_fault+0x3a8/0x57c
[C00000077967BE30] [C000000000004AFC] handle_page_fault+0x20/0x58
Instruction dump:
fb610078 780957e3 ebe30000 7c000026 54001ffe 0b000000 e97e8030 39200001
800b0000 7d290036 3929ffff 7c894838 <0b090000> 800b0000 39200001 7d290036
Fixing recursive fault but reboot is needed!
BUG: soft lockup detected on CPU#0!
Call Trace:
[C000000779AD74C0] [C00000000000F588] .show_stack+0x68/0x1b4 (unreliable)
[C000000779AD7570] [C00000000007C5E0] .softlockup_tick+0xec/0x140
[C000000779AD7630] [C00000000005C68C] .run_local_timers+0x1c/0x30
[C000000779AD76B0] [C000000000021358] .timer_interrupt+0x80/0x498
[C000000779AD7790] [C000000000003580] decrementer_common+0x100/0x180
--- Exception: 901 at ._spin_lock+0x30/0x44
LR = .vma_link+0x6c/0x1d4
[C000000779AD7A80] [C0000000005C8C08] 0xc0000000005c8c08 (unreliable)
[C000000779AD7B30] [C000000000098D18] .do_mmap_pgoff+0x650/0x818
[C000000779AD7C40] [C00000000028F204] .do_shmat+0x304/0x454
[C000000779AD7D30] [C000000000289660] .compat_sys_shmat+0x34/0x94
[C000000779AD7DC0] [C000000000014A20] .compat_sys_ipc+0x18c/0x1e8
[C000000779AD7E30] [C00000000000872C] syscall_exit+0x0/0x40
BUG: soft lockup detected on CPU#2!
Call Trace:
[C000000779B934C0] [C00000000000F588] .show_stack+0x68/0x1b4 (unreliable)
[C000000779B93570] [C00000000007C5E0] .softlockup_tick+0xec/0x140
[C000000779B93630] [C00000000005C68C] .run_local_timers+0x1c/0x30
[C000000779B936B0] [C000000000021358] .timer_interrupt+0x80/0x498
[C000000779B93790] [C000000000003580] decrementer_common+0x100/0x180
--- Exception: 901 at ._spin_lock+0x2c/0x44
LR = .vma_link+0x6c/0x1d4
[C000000779B93A80] [C0000000005C8C08] 0xc0000000005c8c08 (unreliable)
[C000000779B93B30] [C000000000098D18] .do_mmap_pgoff+0x650/0x818
[C000000779B93C40] [C00000000028F204] .do_shmat+0x304/0x454
[C000000779B93D30] [C000000000289660] .compat_sys_shmat+0x34/0x94
[C000000779B93DC0] [C000000000014A20] .compat_sys_ipc+0x18c/0x1e8
[C000000779B93E30] [C00000000000872C] syscall_exit+0x0/0x40
BUG: soft lockup detected on CPU#3!
Call Trace:
[C000000779AD34C0] [C00000000000F588] .show_stack+0x68/0x1b4 (unreliable)
[C000000779AD3570] [C00000000007C5E0] .softlockup_tick+0xec/0x140
[C000000779AD3630] [C00000000005C68C] .run_local_timers+0x1c/0x30
[C000000779AD36B0] [C000000000021358] .timer_interrupt+0x80/0x498
[C000000779AD3790] [C000000000003580] decrementer_common+0x100/0x180
--- Exception: 901 at ._spin_lock+0x2c/0x44
LR = .vma_link+0x6c/0x1d4
[C000000779AD3A80] [C0000000005C8C08] 0xc0000000005c8c08 (unreliable)
[C000000779AD3B30] [C000000000098D18] .do_mmap_pgoff+0x650/0x818
[C000000779AD3C40] [C00000000028F204] .do_shmat+0x304/0x454
[C000000779AD3D30] [C000000000289660] .compat_sys_shmat+0x34/0x94
[C000000779AD3DC0] [C000000000014A20] .compat_sys_ipc+0x18c/0x1e8
[C000000779AD3E30] [C00000000000872C] syscall_exit+0x0/0x40
BUG: soft lockup detected on CPU#1!
Call Trace:
[C000000779BB34C0] [C00000000000F588] .show_stack+0x68/0x1b4 (unreliable)
[C000000779BB3570] [C00000000007C5E0] .softlockup_tick+0xec/0x140
[C000000779BB3630] [C00000000005C68C] .run_local_timers+0x1c/0x30
[C000000779BB36B0] [C000000000021358] .timer_interrupt+0x80/0x498
[C000000779BB3790] [C000000000003580] decrementer_common+0x100/0x180
--- Exception: 901 at ._spin_lock+0x2c/0x44
LR = .vma_link+0x6c/0x1d4
[C000000779BB3A80] [C0000000005C8C08] 0xc0000000005c8c08 (unreliable)
[C000000779BB3B30] [C000000000098D18] .do_mmap_pgoff+0x650/0x818
[C000000779BB3C40] [C00000000028F204] .do_shmat+0x304/0x454
[C000000779BB3D30] [C000000000289660] .compat_sys_shmat+0x34/0x94
[C000000779BB3DC0] [C000000000014A20] .compat_sys_ipc+0x18c/0x1e8
[C000000779BB3E30] [C00000000000872C] syscall_exit+0x0/0x40


--
Adam Litke - (agl at us.ibm.com)
IBM Linux Technology Center


2007-03-01 00:58:28

by Eric W. Biederman

[permalink] [raw]
Subject: Re: Kernel Oops with shm namespace cleanups

Adam Litke <[email protected]> writes:

> Hey. While testing 2.6.21-rc2 with libhugetlbfs, the shm-fork test case
> causes the kernel to oops. To reproduce: Execute 'make check' in the
> latest libhugetlbfs source on a 2.6.21-rc2 kernel with 100 huge pages
> allocated. Using fewer huge pages will likely also trigger the oops.
> Libhugetlbfs can be downloaded from:
> http://libhugetlbfs.ozlabs.org/snapshots/libhugetlbfs-dev-20070228.tar.gz
>
> I have collected the following information:

Thanks. I'm going to be offline starting early tomorrow so I'm
unfortunately not going to be timely in tracing this one down.

Ok. Looking at the code I have a clue what is going on. I think
I must have been out of it the day I wrote this patch. I don't have
fsync or get_unmapped_area methods appropriately wrapped. I clearly
did not do a close audit of the filesystem methods that hugetlbfs
inodes use. I may have just gotten luck on other architectures.

get_unmapped_area looks like it will be a bit of a trick.

If it is just failing to wrap the methods a couple of file methods
then the patch below should fix it or come close. That's the best
I can do before I leave.

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a60995a..44f1f05 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -168,7 +168,9 @@ void hugetlb_put_quota(struct address_space *mapping);

static inline int is_file_hugepages(struct file *file)
{
- return file->f_op == &hugetlbfs_file_operations;
+ return (file->f_op == &hugetlbfs_file_operations) ||
+ is_file_shm_hugepages(file);
+
}

static inline void set_file_hugepages(struct file *file)
diff --git a/include/linux/shm.h b/include/linux/shm.h
index a2c896a..ad2e3af 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -96,12 +96,17 @@ struct shmid_kernel /* private to the kernel */

#ifdef CONFIG_SYSVIPC
long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr);
+extern int is_file_shm_hugepages(struct file *file);
#else
static inline long do_shmat(int shmid, char __user *shmaddr,
int shmflg, unsigned long *addr)
{
return -ENOSYS;
}
+static inline int is_file_shm_hugepages(struct file *file)
+{
+ return 0;
+}
#endif

#endif /* __KERNEL__ */
diff --git a/ipc/shm.c b/ipc/shm.c
index 26b935b..93cfa35 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -235,7 +235,7 @@ struct page *shm_nopage(struct vm_area_struct *vma, unsigned long address, int *
}

#ifdef CONFIG_NUMA
-int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
{
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
@@ -245,7 +245,7 @@ int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
return err;
}

-struct mempolicy *shm_get_policy(struct vm_area_struct *vma, unsigned long addr)
+static struct mempolicy *shm_get_policy(struct vm_area_struct *vma, unsigned long addr)
{
struct file *file = vma->vm_file;
struct shm_file_data *sfd = shm_file_data(file);
@@ -284,21 +284,41 @@ static int shm_release(struct inode *ino, struct file *file)
return 0;
}

-#ifndef CONFIG_MMU
+static int shm_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+ int (*fsync) (struct file *, struct dentry *, int datasync);
+ struct shm_file_data *sfd;
+ int ret;
+ ret = -EINVAL;
+ sfd = shm_file_data(file);
+ fsync = sfd->file->f_op->fsync;
+ if (fsync)
+ ret = fsync(sfd->file, sfd->file->f_path.dentry, datasync);
+ return ret;
+}
+
static unsigned long shm_get_unmapped_area(struct file *file,
unsigned long addr, unsigned long len, unsigned long pgoff,
unsigned long flags)
{
struct shm_file_data *sfd = shm_file_data(file);
- return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, pgoff,
- flags);
+ return get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
+int is_file_shm_hugepages(struct file *file)
+{
+ int ret = 0;
+ if (file->f_op == &shm_file_operations) {
+ struct shm_file_data *sfd;
+ sfd = shm_file_data(file);
+ ret = is_file_hugepages(file);
+ }
+ return ret;
}
-#else
-#define shm_get_unmapped_area NULL
-#endif

static struct file_operations shm_file_operations = {
.mmap = shm_mmap,
+ .fsync = shm_fsync,
.release = shm_release,
.get_unmapped_area = shm_get_unmapped_area,
};