Dear Linus:
In the production environment, it is hard to find the causion of an
Oops message if a bug causes the kernel panics. Though the Oops
message is printed on the screen, we can't read the whole message in
most cases due to the size of the screen, and it is hard to save the
Oops message for debugging. I know kdump is a choice, but it needs
more memory, and in most cases, Oops messages are enough to find out
the bugs. The other choices are netconsole and serial line, but they
both need extra computers.
memconsole as a virtual console, will save the messages into a block
of boot memory reserved. Because the memory won't be cleaned(if the
self-check on memory is disabled) between two starts, the oops
messages as the other console messages will be saved between them, and
you have a chance to see what happened in the last start.
In order to avoid the corruption of the memory used by memconsole, I
find the memory from the end to the start.
Here is the code, and the code is mess currently. Am I in the right
direction? Are my assumptions right?
the following code is appended to the file mm/bootmem.c
void *memconsole_mem_start = NULL;
EXPORT_SYMBOL_GPL(memconsole_mem_start);
unsigned int memconsole_mem_size = 0;
EXPORT_SYMBOL_GPL(memconsole_mem_size);
static int __init memconsole_setup(char *str)
{
unsigned long start;
memconsole_mem_size = simple_strtoul(str, NULL, 10);
memconsole_mem_size = PAGE_ALIGN(memconsole_mem_size);
for (start = max_low_pfn - (memconsole_mem_size >> PAGE_SHIFT);
start >= min_low_pfn;
start -= (memconsole_mem_size >> PAGE_SHIFT)) {
if (reserve_bootmem_generic(start << PAGE_SHIFT,
memconsole_mem_size,
BOOTMEM_EXCLUSIVE) >= 0)
break;
}
if (start < min_low_pfn)
panic("Can't reserve bootmem for memconsole\n");
memconsole_mem_start = phys_to_virt(start << PAGE_SHIFT);
printk("reserve bootmem for memconsole %u@%p\n",
memconsole_mem_size, memconsole_mem_start);
return 1;
}
__setup("memconsole=", memconsole_setup);
An individual kernel module is used to implement the whole function.
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/reboot.h>
#include <linux/console.h>
#include <linux/proc_fs.h>
#include <linux/io.h>
static int reboot = 1;
module_param(reboot, bool, 0644);
static int panic_restart(struct notifier_block *block, unsigned long l, void *p)
{
if (reboot)
emergency_restart();
return NOTIFY_DONE;
}
static struct notifier_block paniced = {
.notifier_call = panic_restart,
.priority = INT_MIN,
};
static char *buf;
static unsigned int *buf_len;
static char **buf_ptr;
static unsigned int buf_size;
struct buf_header {
char *buf_ptr;
unsigned int buf_len;
};
static DEFINE_SPINLOCK(buf_lock);
static int buf_init(void)
{
extern void* memconsole_mem_start;
extern unsigned int memconsole_mem_size;
struct buf_header *hdr;
if (memconsole_mem_size == 0 || memconsole_mem_start == NULL) {
printk(KERN_WARNING "no memory for memconsole\n");
return -ENOMEM;
}
hdr = memconsole_mem_start;
if (memconsole_mem_size <= sizeof(*hdr)) {
printk(KERN_WARNING "no enough memory for memconsole\n");
return -EINVAL;
}
buf = (char*)(hdr + 1);
buf_size = min(memconsole_mem_size - sizeof(*hdr), PAGE_SIZE);
if (hdr->buf_ptr < buf || hdr->buf_ptr >= buf + buf_size ||
hdr->buf_len > buf_size ||
(hdr->buf_len < buf_size &&
(unsigned int)(hdr->buf_ptr - buf) != hdr->buf_len)) {
printk(KERN_WARNING "memory for memconsole isn't
initailized\n");
hdr->buf_ptr = buf;
hdr->buf_len = 0;
}
buf_ptr = &hdr->buf_ptr;
buf_len = &hdr->buf_len;
return 0;
}
static void record_msg(struct console *con, const char *msg, unsigned int len)
{
unsigned int n;
unsigned long flags;
spin_lock_irqsave(&buf_lock, flags);
while (len != 0) {
n = min((unsigned int)(buf + buf_size - *buf_ptr), len);
memcpy(*buf_ptr, msg, n);
msg += n;
len -= n;
*buf_ptr += n;
if (*buf_ptr == buf + buf_size)
*buf_ptr = buf;
if (*buf_len < buf_size)
*buf_len = min((unsigned int)buf_size, *buf_len + n);
}
spin_unlock_irqrestore(&buf_lock, flags);
}
static struct console memconsole =
{
.name = "memconsole",
.flags = CON_ENABLED | CON_PRINTBUFFER,
.write = record_msg,
};
static int proc_read(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
unsigned int n;
unsigned int retval;
unsigned long flags;
spin_lock_irqsave(&buf_lock, flags);
if ((retval = *buf_len) < buf_size) {
memcpy(page, buf, *buf_len);
} else {
n = buf + buf_size - *buf_ptr;
memcpy(page, *buf_ptr, n);
if (buf != *buf_ptr)
memcpy(page + n, buf, *buf_ptr - buf);
retval = buf_size;
}
spin_unlock_irqrestore(&buf_lock, flags);
return retval;
}
static int __init init(void)
{
int retval;
if ((retval = buf_init()) != 0)
return retval;
if (create_proc_read_entry("memconsole", 0444, NULL, proc_read,
NULL) == NULL) {
printk("Can't register proc entry\n");
return -EFAULT;
}
atomic_notifier_chain_register(&panic_notifier_list, &paniced);
register_console(&memconsole);
return 0;
}
module_init(init);
static void __exit fini(void)
{
unregister_console(&memconsole);
atomic_notifier_chain_unregister(&panic_notifier_list, &paniced);
remove_proc_entry("memconsole", NULL);
}
module_exit(fini);
MODULE_AUTHOR("xiaosuo <[email protected]>");
MODULE_LICENSE("GPL");
Certainly, I tested it on my virtual machine, and I did work.
--
Regards,
Changli Gao([email protected])
On Sun, Jun 14, 2009 at 09:29:48AM +0800, Changli Gao wrote:
>
>the following code is appended to the file mm/bootmem.c
Why not a patch?
Your code is a kernel module, why do you append it into
a non-module source file?
On Mon, Jun 15, 2009 at 3:11 PM, Amerigo Wang<[email protected]> wrote:
> On Sun, Jun 14, 2009 at 09:29:48AM +0800, Changli Gao wrote:
>>
>>the following code is appended to the file mm/bootmem.c
>
>
> Why not a patch?
>
> Your code is a kernel module, why do you append it into
> a non-module source file?
>
bootmem can't be allocated in modules. :(
--
Regards,
Changli Gao([email protected])
2009/6/15 Changli Gao <[email protected]>:
> On Mon, Jun 15, 2009 at 3:11 PM, Amerigo Wang<[email protected]> wrote:
>> On Sun, Jun 14, 2009 at 09:29:48AM +0800, Changli Gao wrote:
>>>
>>>the following code is appended to the file mm/bootmem.c
>>
>>
>> Why not a patch?
>>
>> Your code is a kernel module, why do you append it into
>> a non-module source file?
>>
> bootmem can't be allocated in modules. :(
Ok, then explain your code below:
module_init(init);
...
module_exit(fini);
MODULE_AUTHOR("xiaosuo <[email protected]>");
MODULE_LICENSE("GPL");
On Sun 2009-06-14 09:29:48, Changli Gao wrote:
> Dear Linus:
>
> In the production environment, it is hard to find the causion of an
> Oops message if a bug causes the kernel panics. Though the Oops
> message is printed on the screen, we can't read the whole message in
> most cases due to the size of the screen, and it is hard to save the
> Oops message for debugging. I know kdump is a choice, but it needs
> more memory, and in most cases, Oops messages are enough to find out
> the bugs. The other choices are netconsole and serial line, but they
> both need extra computers.
>
> memconsole as a virtual console, will save the messages into a block
> of boot memory reserved. Because the memory won't be cleaned(if the
> self-check on memory is disabled) between two starts, the oops
> messages as the other console messages will be saved between them, and
> you have a chance to see what happened in the last start.
>
> In order to avoid the corruption of the memory used by memconsole, I
> find the memory from the end to the start.
>
> Here is the code, and the code is mess currently. Am I in the right
> direction? Are my assumptions right?
>
> the following code is appended to the file mm/bootmem.c
Generate proper patch...
> void *memconsole_mem_start = NULL;
> EXPORT_SYMBOL_GPL(memconsole_mem_start);
> unsigned int memconsole_mem_size = 0;
> EXPORT_SYMBOL_GPL(memconsole_mem_size);
>
> static int __init memconsole_setup(char *str)
> {
> unsigned long start;
>
> memconsole_mem_size = simple_strtoul(str, NULL, 10);
> memconsole_mem_size = PAGE_ALIGN(memconsole_mem_size);
> for (start = max_low_pfn - (memconsole_mem_size >> PAGE_SHIFT);
> start >= min_low_pfn;
> start -= (memconsole_mem_size >> PAGE_SHIFT)) {
> if (reserve_bootmem_generic(start << PAGE_SHIFT,
> memconsole_mem_size,
> BOOTMEM_EXCLUSIVE) >= 0)
> break;
> }
> if (start < min_low_pfn)
> panic("Can't reserve bootmem for memconsole\n");
> memconsole_mem_start = phys_to_virt(start << PAGE_SHIFT);
> printk("reserve bootmem for memconsole %u@%p\n",
> memconsole_mem_size, memconsole_mem_start);
>
> return 1;
> }
> __setup("memconsole=", memconsole_setup);
>
> An individual kernel module is used to implement the whole function.
> Certainly, I tested it on my virtual machine, and I did work.
...and test it on real hw...?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Sun, Jun 21, 2009 at 2:58 PM, Pavel Machek<[email protected]> wrote:
>
> ...and test it on real hw...?
> Pavel
>
Bad news, it doesn't work on real computer, so the patch isn't necessary now.
I'm trying another way: building a initrd.gz, which doesn't nothing
except of dumping the demsg into a non-volatile storage device: such
as a file or a partiton of a hard disk, and it does work on real
computer. When it is stable, I'll share the code with you all.
--
Regards,
Changli Gao([email protected])