2005-11-15 21:30:13

by Pavel Machek

[permalink] [raw]
Subject: [RFC] userland swsusp

Hi!

This is prototype of userland swsusp. I'd like kernel parts to go in,
probably for 2.6.16. Now, I'm not sure about the interface, ioctls are
slightly ugly, OTOH it would be probably overkill to introduce
syscalls just for this. (I'll need to add an ioctl for freeing memory
in future).

Small question is where should userspace parts go. In-kernel
kernel/power/swsusp.c is basically replaced by usr/swsusp.c....

[Of course, I'll need to fix the patch up so that it does not modify
existing behaviour].

Signed-off-by: Pavel Machek <[email protected]>
Pavel

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -27,6 +27,7 @@
#include <linux/crash_dump.h>
#include <linux/backing-dev.h>
#include <linux/bootmem.h>
+#include <linux/suspend.h>

#include <asm/uaccess.h>
#include <asm/io.h>
@@ -559,6 +561,45 @@ static ssize_t write_port(struct file *
}
#endif

+static int
+ioctl_kmem(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int retval = 0;
+
+ switch (cmd) {
+ case IOCTL_FREEZE:
+ retval = sys_freeze();
+ break;
+ case IOCTL_UNFREEZE:
+ retval = sys_unfreeze();
+ break;
+ case IOCTL_ATOMIC_SNAPSHOT:
+ retval = sys_atomic_snapshot(arg);
+ break;
+ case IOCTL_ATOMIC_RESTORE:
+ {
+ int pages;
+ void *pgdir;
+ get_user(pages, (long *) arg);
+ get_user(pgdir, (void **) (arg + 4));
+ retval = sys_atomic_restore(pgdir, pages);
+ }
+ break;
+ case IOCTL_KMALLOC:
+ retval = get_zeroed_page(GFP_KERNEL);
+ break;
+ case IOCTL_KFREE:
+ free_page(arg);
+ break;
+ default:
+ retval = -ENOTTY;
+ break;
+ }
+
+ return retval;
+}
+
+
static ssize_t read_null(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
@@ -769,6 +810,7 @@ static struct file_operations mem_fops =
static struct file_operations kmem_fops = {
.llseek = memory_lseek,
.read = read_kmem,
+ .ioctl = ioctl_kmem,
.write = write_kmem,
.mmap = mmap_kmem,
.open = open_kmem,
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -1,6 +1,7 @@
#ifndef _LINUX_SWSUSP_H
#define _LINUX_SWSUSP_H

+#ifdef __KERNEL__
#if defined(CONFIG_X86) || defined(CONFIG_FRV) || defined(CONFIG_PPC32)
#include <asm/suspend.h>
#endif
@@ -9,6 +10,7 @@
#include <linux/config.h>
#include <linux/init.h>
#include <linux/pm.h>
+#endif

/* page backup entry */
typedef struct pbe {
@@ -30,6 +32,7 @@ typedef struct pbe {
#define for_each_pb_page(pbe, pblist) \
for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)

+#ifdef __KERNEL__

#define SWAP_FILENAME_MAXLENGTH 32

@@ -79,4 +82,24 @@ unsigned long get_safe_page(gfp_t gfp_ma
*/
#define PAGES_FOR_IO 512

+#endif
+
+struct restore_ioctl {
+ void *pgdir;
+ int nr_pages;
+};
+
+#define IOCTL_FREEZE _IO('3', 1)
+#define IOCTL_UNFREEZE _IO('3', 2)
+#define IOCTL_ATOMIC_SNAPSHOT _IOW('3', 3, void **)
+#define IOCTL_ATOMIC_RESTORE _IOR('3', 4, struct restore_ioctl)
+#define IOCTL_KMALLOC _IO('3', 5)
+#define IOCTL_KFREE _IOR('3', 6, void *)
+
+extern int sys_freeze(void);
+extern int sys_unfreeze(void);
+extern int sys_atomic_snapshot(void **pgdir);
+extern int sys_atomic_restore(void *pgdir, int pages);
+
+
#endif /* _LINUX_SWSUSP_H */
diff --git a/kernel/power/console.c b/kernel/power/console.c
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -9,6 +9,7 @@
#include <linux/console.h>
#include "power.h"

+#undef SUSPEND_CONSOLE
static int new_loglevel = 10;
static int orig_loglevel;
#ifdef SUSPEND_CONSOLE
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -143,12 +144,25 @@ thaw:

static void unprepare_processes(void)
{
- platform_finish();
thaw_processes();
enable_nonboot_cpus();
pm_restore_console();
}

+
+int sys_freeze(void)
+{
+ return prepare_processes();
+}
+
+int sys_unfreeze(void)
+{
+ thaw_processes();
+ enable_nonboot_cpus();
+ pm_restore_console();
+ return 0;
+}
+
/**
* pm_suspend_disk - The granpappy of power management.
*
@@ -243,6 +257,9 @@ static int software_resume(void)
if ((error = swsusp_check()))
goto Done;

+ /* Prepare processes only after swsusp_check; we could do it before,
+ but it would mean an ugly console switch even in case of normal boot.
+ */
pr_debug("PM: Preparing processes for restore.\n");

if ((error = prepare_processes())) {
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -498,6 +499,8 @@ static int write_pagedir(void)

printk( "Writing pagedir...");
for_each_pb_page (pbe, pagedir_nosave) {
+ /* FIXME: pagedir only has 768 entries. We may overflow it,
+ if we write around 768000 pages, thats ~4GB. */
if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++])))
return error;
}
@@ -537,7 +540,10 @@ static int write_suspend_image(void)

if (!enough_swap(nr_copy_pages)) {
printk(KERN_ERR "swsusp: Not enough free swap\n");
+#if 0
+ /* FIXME: should be done earlier */
return -ENOSPC;
+#endif
}

init_header();
@@ -1008,3 +1028,55 @@ void swsusp_close(void)

blkdev_put(resume_bdev);
}
+
+static int in_suspend __nosavedata = 0;
+
+int sys_atomic_snapshot(void **pgdir)
+{
+ int err;
+
+ err = device_suspend(PMSG_FREEZE);
+ if (err)
+ return err;
+
+ in_suspend = 1;
+ err = swsusp_suspend();
+
+ *pgdir = pagedir_nosave; /* FIXME: put_user */
+
+ {
+ struct pbe *p = pagedir_nosave;
+ int i = 0;
+ for_each_pbe (p, pagedir_nosave)
+ i++;
+ }
+
+ if (!err)
+ err = nr_copy_pages;
+ if (in_suspend == 2) {
+ err = -ENOANO;
+ }
+
+ device_resume();
+ return err;
+}
+
+int sys_atomic_restore(void *pgdir, int pages)
+{
+ int err;
+ /* FIXME: we'll probably overwrite pagedir with itself in inconsistent state...
+ ...no, pagedir is NOSAVE.
+ */
+
+ err = device_suspend(PMSG_FREEZE);
+ if (err)
+ return err;
+
+ in_suspend = 2;
+ pagedir_nosave = pgdir;
+ nr_copy_pages = pages;
+
+ err = swsusp_resume();
+ printk(KERN_CRIT "This should never return\n");
+ return err;
+}
diff --git a/usr/swsusp-init b/usr/swsusp-init
new file mode 100755
--- /dev/null
+++ b/usr/swsusp-init
@@ -0,0 +1,9 @@
+#!/bin/bash
+#
+# swapoff /dev/hda1; cat /dev/zero | head -c 4096 > /dev/hda1
+# /tmp/swsusp /dev/hda1 -s -b
+/tmp/swsusp /dev/hda1 -r
+exec /sbin/init
+exit
+
+
diff --git a/usr/swsusp.c b/usr/swsusp.c
new file mode 100755
--- /dev/null
+++ b/usr/swsusp.c
@@ -0,0 +1,529 @@
+#if 0
+#
+# Swsusp3 control program
+#
+# Copyright 2005 Pavel Machek <[email protected]>
+#
+# Distribute under GPLv2
+#
+gcc -g -Wall usr/swsusp.c -o /tmp/swsusp; cp -a usr/swsusp-init /tmp
+exit
+#
+#endif
+
+#define PAGE_SIZE 4096
+
+#include <unistd.h>
+#include <syscall.h>
+//#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <asm/fcntl.h>
+#include <string.h>
+
+extern __off64_t lseek64 (int __fd, __off64_t __offset, int __whence) __THROW;
+
+typedef long swp_entry_t;
+
+#include "/data/l/linux-sw3/include/linux/suspend.h"
+#include "/data/l/linux-sw3/include/linux/reboot.h"
+
+char forbidden_pages[(0xffffffff / PAGE_SIZE)+1];
+
+struct resume {
+ int nr_copy_pages;
+ void *pagedir;
+} resume;
+
+struct pbe_page {
+ unsigned long address; /* address of the copy */
+ unsigned long orig_address; /* original address of page */
+ swp_entry_t swap_address;
+
+ struct pbe *next; /* also used as scratch space at
+ * end of page (see link, diskpage)
+ */
+ char data[4096-16];
+};
+
+int kmem;
+
+void
+seek(unsigned long dest)
+{
+ if (lseek64(kmem, dest, SEEK_SET) != dest) {
+ fprintf(stderr, "Could not do intial seek to %lx: %m\n", dest);
+ fprintf(stderr, "lseek64(%d) returned: %lx\n", kmem, (long) lseek64(kmem, dest, SEEK_SET));
+ exit(1);
+ }
+}
+
+typedef int (*walker_t)(struct pbe *p, int i);
+typedef int (*walker2_t)(struct pbe_page *p, int i);
+
+int
+walk_chain(struct resume *r, walker_t w)
+{
+ struct pbe p;
+ int i = 0;
+ long pos;
+
+ seek(pos = (long) r->pagedir);
+ while (1) {
+ if (read(kmem, &p, sizeof(p)) != sizeof(p)) {
+ fprintf(stderr, "Could not read pbe #%d: %m\n", i);
+ exit(1);
+ }
+ if (w != NULL) {
+ w(&p, i);
+ seek(pos);
+ if (write(kmem, &p, sizeof(p)) != sizeof(p)) {
+ fprintf(stderr, "Could not write back pbe #%d: %m\n", i);
+ exit(1);
+ }
+ }
+ i++;
+ if (!p.next)
+ break;
+ seek(pos = (long) p.next);
+ }
+ return i;
+}
+
+void
+walk_pages_chain(struct resume *r, walker2_t w)
+{
+ struct pbe_page p;
+ int i = 0;
+ long pos;
+
+ seek(pos = (long) r->pagedir);
+ while (1) {
+ if (read(kmem, &p, sizeof(p)) != sizeof(p)) {
+ fprintf(stderr, "Could not read pbe #%d: %m\n", i);
+ exit(1);
+ }
+ if ((w != NULL) && !(pos & 0xfff)) {
+ w(&p, i);
+ seek(pos);
+ if (write(kmem, &p, sizeof(p)) != sizeof(p))
+ fprintf(stderr, "Could not write back pbe #%d: %m\n", i);
+ }
+ i++;
+ if (!p.next)
+ break;
+ seek(pos = (long) p.next);
+ }
+}
+
+
+int image_fd, image_pos = 4096;
+
+static int write_page(unsigned long addr, swp_entry_t * loc)
+{
+ swp_entry_t entry;
+
+ entry = image_pos;
+ image_pos += 4096;
+
+ {
+ char buf[4096];
+ seek(addr);
+ if (read(kmem, buf, 4096) != 4096) {
+ fprintf(stderr, "Could not read page #%lx: %m\n", addr);
+ exit(1);
+ }
+ *loc = image_pos;
+ if (lseek(image_fd, image_pos, SEEK_SET) != image_pos) {
+ fprintf(stderr, "Could not seek in image to #%d: %m\n", image_pos);
+ exit(1);
+ }
+ if (write(image_fd, buf, 4096) != 4096) {
+ fprintf(stderr, "Could not write to image at #%d: %m\n", image_pos);
+ exit(1);
+ }
+ }
+ return 0;
+}
+
+unsigned int mod;
+
+static int data_write_one(struct pbe *p, int i)
+{
+ int error;
+ if (!(i%mod))
+ printf( "\b\b\b\b%3d%%", i / mod );
+ if ((error = write_page(p->address, &(p->swap_address))))
+ return error;
+ return 0;
+}
+
+
+struct swsusp_info {
+ int nr_copy_pages;
+ int version_code;
+ char signature[10];
+ swp_entry_t pagedir[768];
+} __attribute__((aligned(4096)));
+
+struct swsusp_info swsusp_info, zeros;
+
+/**
+ * data_write - Write saved image to swap.
+ */
+static int data_write(void)
+{
+ int error = 0;
+ mod = resume.nr_copy_pages / 100;
+
+ if (!mod)
+ mod = 1;
+
+ printf( "Writing data to swap (%d pages)... ", resume.nr_copy_pages );
+ walk_chain(&resume, data_write_one);
+ printf("\b\b\b\bdone\n");
+ return error;
+}
+
+unsigned n = 0;
+
+static int pgdir_write_one(struct pbe_page *pbe, int i)
+{
+ int error;
+ if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++])))
+ return error;
+ return 0;
+}
+
+/**
+ * write_pagedir - Write the array of nr_copy_pages holding the page directory.
+ * @last: Last swap entry we write (needed for header).
+ */
+
+static int write_pagedir(void)
+{
+ int error = 0;
+
+ printf( "Writing pagedir...");
+ walk_pages_chain(&resume, pgdir_write_one);
+
+ swsusp_info.nr_copy_pages = n;
+ printf("done (%u pages)\n", n);
+ return error;
+}
+
+
+/**
+ * write_suspend_image - Write entire image and metadata.
+ *
+ */
+static int write_suspend_image(void)
+{
+ int error;
+
+ if ((error = data_write()))
+ goto Done;
+
+ if ((error = write_pagedir()))
+ goto Done;
+
+ swsusp_info.nr_copy_pages = resume.nr_copy_pages;
+ swsusp_info.version_code = 1;
+ strcpy(swsusp_info.signature, "swsusp3");
+ lseek(image_fd, 0, SEEK_SET);
+ write(image_fd, &swsusp_info, 4096);
+ Done:
+ return error;
+}
+
+char *image;
+
+int
+do_suspend(void)
+{
+ kmem = open("/dev/kmem", O_RDWR | O_LARGEFILE);
+ image_fd = open(image, O_RDWR | O_CREAT, 0600);
+ resume.nr_copy_pages = -1;
+ resume.pagedir = NULL;
+
+ if (kmem < 0) {
+ fprintf(stderr, "Could not open /dev/kmem: %m\n");
+ exit(1);
+ }
+
+ if (ioctl(kmem, IOCTL_FREEZE, 0)) {
+ fprintf(stderr, "Could not freeze system: %m\n");
+ exit(1); /* We do not want to reboot in case of failure */
+ }
+
+ resume.nr_copy_pages = ioctl(kmem, IOCTL_ATOMIC_SNAPSHOT, &resume.pagedir);
+ if (resume.nr_copy_pages < 0) {
+ fprintf(stderr, "Could not snapshot system: %m\n");
+
+ if (ioctl(kmem, IOCTL_UNFREEZE, 0)) {
+ fprintf(stderr, "Could not unfreeze system: %m\n");
+ return 1;
+ }
+ exit(1); /* Stop infinite loop of reboots */
+ }
+
+ walk_chain(&resume, NULL);
+ /* Ouch, at this point we'll appear in ATOMIC_SNAPSHOT syscall, with no way to tell... */
+
+ printf("Snapshotted, have %d pages, pagedir at %lx\n", resume.nr_copy_pages, (long) resume.pagedir);
+ walk_chain(&resume, NULL);
+ write_suspend_image();
+ fsync(image_fd);
+
+ return 0;
+
+}
+
+
+
+/**
+ * fill_pb_page - Create a list of PBEs on a given memory page
+ */
+
+static inline void fill_pb_page(struct pbe *pbpage)
+{
+ struct pbe *p;
+
+ p = pbpage;
+ pbpage += PB_PAGE_SKIP;
+ do
+ p->next = p + 1;
+ while (++p < pbpage);
+}
+
+unsigned long get_page(void)
+{
+ unsigned long ret;
+
+ do {
+ ret = ioctl(kmem, IOCTL_KMALLOC, 1);
+ } while(forbidden_pages[ret/PAGE_SIZE]);
+
+ return ret;
+}
+
+/**
+ * alloc_pagedir - Allocate the page directory.
+ *
+ * First, determine exactly how many pages we need and
+ * allocate them.
+ *
+ * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
+ * struct pbe elements (pbes) and the last element in the page points
+ * to the next page.
+ *
+ * On each page we set up a list of struct_pbe elements.
+ */
+
+static struct pbe * alloc_pagedir(unsigned nr_pages)
+{
+ unsigned num;
+ struct pbe *pblist;
+ struct pbe buf[PBES_PER_PAGE];
+ int i;
+
+ printf("alloc_pagedir(): nr_pages = %d\n", nr_pages);
+ resume.pagedir = pblist = (struct pbe *) get_page();
+ for (num = PBES_PER_PAGE; num < nr_pages;
+ nr_pages -= PBES_PER_PAGE) {
+
+ for (i=0; i<PBES_PER_PAGE-1; i++)
+ buf[i].next = &pblist[i+1];
+ buf[PBES_PER_PAGE-1].next = (struct pbe *) get_page();
+
+ seek((long) pblist);
+ write(kmem, buf, PAGE_SIZE);
+ pblist = buf[PBES_PER_PAGE-1].next;
+ }
+
+ for (i=0; i<nr_pages-1; i++)
+ buf[i].next = &pblist[i+1];
+ buf[nr_pages-1].next = 0;
+
+ seek((long) pblist);
+ write(kmem, buf, PAGE_SIZE);
+ return NULL;
+}
+
+
+
+
+/**
+ * read_pagedir - Read page backup list pages from swap
+ */
+
+static int read_pagedir_one(struct pbe *pbpage, int pos)
+{
+ struct pbe buf[PBES_PER_PAGE];
+ int error;
+ int i;
+ unsigned long offset = swsusp_info.pagedir[pos/PBES_PER_PAGE];
+
+ error = -1;
+ if (!offset)
+ printf("Something went very wrong at pagedir #%d\n", pos);
+
+ lseek(image_fd, offset, SEEK_SET);
+ error = (read(image_fd, (void *)buf, PAGE_SIZE) != PAGE_SIZE);
+
+ for (i=0; i<PBES_PER_PAGE; i++) {
+ pbpage[i].orig_address = buf[i].orig_address;
+ forbidden_pages[pbpage[i].orig_address / PAGE_SIZE] = 1;
+ pbpage[i].swap_address = buf[i].swap_address;
+ pbpage[i].address = buf[i].address;
+ }
+
+ return error;
+}
+
+
+
+/**
+ * data_read - Read image pages from swap.
+ */
+static int data_read_one(struct pbe *p, int i)
+{
+ int error = 0;
+ char buf[PAGE_SIZE];
+
+ if (!(i % mod))
+ printf("\b\b\b\b%3d%%", i / mod);
+
+ lseek(image_fd, p->swap_address, SEEK_SET);
+
+ p->address = get_page();
+ error = (read(image_fd, buf, PAGE_SIZE) != PAGE_SIZE);
+ seek(p->address);
+ error = (write(kmem, buf, PAGE_SIZE) != PAGE_SIZE);
+
+ return error;
+}
+
+int
+do_resume(void)
+{
+ kmem = open("/dev/kmem", O_RDWR | O_LARGEFILE);
+ image_fd = open(image, O_RDWR);
+
+ if (kmem < 0) {
+ fprintf(stderr, "Could not open /dev/kmem: %m\n");
+ return 1;
+ }
+
+ memset(&swsusp_info, 0, sizeof(swsusp_info));
+ read(image_fd, &swsusp_info, sizeof(swsusp_info));
+ resume.nr_copy_pages = swsusp_info.nr_copy_pages;
+
+ if (strcmp("swsusp3", swsusp_info.signature))
+ exit(0);
+ if (lseek(image_fd, 0, SEEK_SET) != 0) {
+ printf("Could not seek to kill sig: %m\n");
+ exit(1);
+ }
+ if (write(image_fd, &zeros, sizeof(swsusp_info)) != sizeof(swsusp_info)) {
+ printf("Could not write to kill sig: %m\n");
+ exit(1);
+ }
+ if (fsync(image_fd)) {
+ printf("Could not fsync to kill sig: %m\n");
+ exit(1);
+ }
+ printf("Got image, %d pages, signature [%s]\n", resume.nr_copy_pages, swsusp_info.signature);
+
+ alloc_pagedir(resume.nr_copy_pages);
+ printf("Verifying allocated pagedir: %d pages\n", walk_chain(&resume, NULL));
+ printf("swsusp: Reading pagedir ");
+ walk_pages_chain(&resume, (void *) read_pagedir_one);
+ printf("ok\n");
+
+ /* Need to be done twice; so that forbidden_pages comes into effect */
+ alloc_pagedir(resume.nr_copy_pages);
+ printf("Verifying allocated pagedir: %d pages\n", walk_chain(&resume, NULL));
+ printf("swsusp: Reading pagedir ");
+ walk_pages_chain(&resume, (void *) read_pagedir_one);
+ printf("ok\n");
+
+ printf("Verifying allocated pagedir: %d pages\n", walk_chain(&resume, NULL));
+
+ /* FIXME: Need to relocate pages */
+ mod = swsusp_info.nr_copy_pages / 100;
+ if (!mod)
+ mod = 1;
+ printf("swsusp: Reading image data (%d pages): ",
+ swsusp_info.nr_copy_pages);
+ walk_chain(&resume, data_read_one);
+ printf("\b\b\b\bdone\n");
+
+ if (ioctl(kmem, IOCTL_FREEZE, 0)) {
+ fprintf(stderr, "Could not freeze system: %m\n");
+ return 1;
+ }
+
+ if (ioctl(kmem, IOCTL_ATOMIC_RESTORE, &resume)) {
+ fprintf(stderr, "Could not restore system: %m\n");
+ }
+ /* Ouch, at this point we'll appear in ATOMIC_SNAPSHOT syscall, if
+ things went ok... */
+
+ return 0;
+}
+
+/*
+#define LINUX_REBOOT_CMD_RESTART 0x01234567
+#define LINUX_REBOOT_CMD_HALT 0xCDEF0123
+#define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC
+#define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
+#define LINUX_REBOOT_CMD_SW_SUSPEND 0xD000FCE2
+*/
+
+int reboot(unsigned long todo)
+{
+ syscall(SYS_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, todo, 0);
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int error;
+
+ sync();
+ setvbuf(stdout, NULL, _IONBF, 0);
+ setvbuf(stderr, NULL, _IONBF, 0);
+
+ if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
+ fprintf(stderr, "Could not lock myself: %m\n");
+ return 1;
+ }
+
+ image = argv[1];
+
+ while (argv[2]) {
+
+ if (!strcmp(argv[2], "-s"))
+ error = do_suspend();
+
+ if (!strcmp(argv[2], "-b"))
+ reboot(LINUX_REBOOT_CMD_RESTART);
+
+ if (!strcmp(argv[2], "-h"))
+ reboot(LINUX_REBOOT_CMD_HALT);
+
+ if (!strcmp(argv[2], "-o"))
+ reboot(LINUX_REBOOT_CMD_POWER_OFF);
+
+ if (!strcmp(argv[2], "-r"))
+ error = do_resume();
+
+ argv++;
+ }
+ return error;
+}
+

--
Thanks, Sharp!


2005-11-15 21:47:50

by Greg KH

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Tue, Nov 15, 2005 at 10:29:42PM +0100, Pavel Machek wrote:
> Hi!
>
> This is prototype of userland swsusp. I'd like kernel parts to go in,
> probably for 2.6.16. Now, I'm not sure about the interface, ioctls are
> slightly ugly, OTOH it would be probably overkill to introduce
> syscalls just for this. (I'll need to add an ioctl for freeing memory
> in future).

What's wrong with 4 new syscalls? It seems the cleanest way.

thanks,

greg k-h

2005-11-15 22:03:15

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi!

> > This is prototype of userland swsusp. I'd like kernel parts to go in,
> > probably for 2.6.16. Now, I'm not sure about the interface, ioctls are
> > slightly ugly, OTOH it would be probably overkill to introduce
> > syscalls just for this. (I'll need to add an ioctl for freeing memory
> > in future).
>
> What's wrong with 4 new syscalls? It seems the cleanest way.

I'd need about 7 of them, and that is on at least 3 architectures
(i386, x86-64, ppc, not sure about ppc64/arm). And it does not fix the
interface -- userland parts will still need to read/write /dev/kmem
:-(.

Yep, I can do it...
Pavel
--
Thanks, Sharp!

2005-11-15 22:26:12

by Dave Jones

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Tue, Nov 15, 2005 at 10:29:42PM +0100, Pavel Machek wrote:
> Hi!
>
> This is prototype of userland swsusp. I'd like kernel parts to go in,
> probably for 2.6.16. Now, I'm not sure about the interface, ioctls are
> slightly ugly, OTOH it would be probably overkill to introduce
> syscalls just for this. (I'll need to add an ioctl for freeing memory
> in future).

Just for info: If this goes in, Red Hat/Fedora kernels will fork
swsusp development, as this method just will not work there.
(We have a restricted /dev/mem that prevents writes to arbitary
memory regions, as part of a patchset to prevent rootkits)

Even it were not for this, the whole idea seems misconcieved to me
anyway.

Dave

2005-11-15 23:32:19

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi!

> > This is prototype of userland swsusp. I'd like kernel parts to go in,
> > probably for 2.6.16. Now, I'm not sure about the interface, ioctls are
> > slightly ugly, OTOH it would be probably overkill to introduce
> > syscalls just for this. (I'll need to add an ioctl for freeing memory
> > in future).
>
> Just for info: If this goes in, Red Hat/Fedora kernels will fork
> swsusp development, as this method just will not work there.
> (We have a restricted /dev/mem that prevents writes to arbitary
> memory regions, as part of a patchset to prevent rootkits)

If this goes in, you can still keep using old method... I'll not
remove it anytime soon.

> Even it were not for this, the whole idea seems misconcieved to me
> anyway.

...but how do you provide nice, graphical progress bar for swsusp
without this? People want that, and "esc to abort", compression,
encryption. Too much to be done in kernel space, IMNSHO.
Pavel
--
Thanks, Sharp!

2005-11-15 23:40:29

by Dave Jones

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Wed, Nov 16, 2005 at 12:32:01AM +0100, Pavel Machek wrote:

> If this goes in, you can still keep using old method... I'll not
> remove it anytime soon.

Ok.

> > Even it were not for this, the whole idea seems misconcieved to me
> > anyway.
>
> ...but how do you provide nice, graphical progress bar for swsusp
> without this? People want that, and "esc to abort", compression,
> encryption. Too much to be done in kernel space, IMNSHO.

I'll take "rootkit doesnt work" over "bells and whistles".

I think most users actually care more about "works" than
"looks pretty, and then fails spectacularly".

Dave

2005-11-16 04:35:51

by Dumitru Ciobarcianu

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

În data de Mi, 16-11-2005 la 00:32 +0100, Pavel Machek a scris:
> ...but how do you provide nice, graphical progress bar for swsusp
> without this? People want that, and "esc to abort", compression,
> encryption. Too much to be done in kernel space, IMNSHO.

Pavel, you really should _listen_ when someone else is talking about the
same things in different implementations. suspend2 has this feature
(nice graphical progress bars in userspace) for a long time now and it's
compatible with the fedora kernels.

Why don't you and Nigel (of suspend2) can just work together on this ?
It's a shame that much work is wasted in duplicated effort.

--
Cioby


2005-11-16 06:30:31

by Greg KH

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Wed, Nov 16, 2005 at 06:35:30AM +0200, Dumitru Ciobarcianu wrote:
> ??n data de Mi, 16-11-2005 la 00:32 +0100, Pavel Machek a scris:
> > ...but how do you provide nice, graphical progress bar for swsusp
> > without this? People want that, and "esc to abort", compression,
> > encryption. Too much to be done in kernel space, IMNSHO.
>
> Pavel, you really should _listen_ when someone else is talking about the
> same things in different implementations. suspend2 has this feature
> (nice graphical progress bars in userspace) for a long time now and it's
> compatible with the fedora kernels.

It's also implemented in the kernel, which is exactly the wrong place
for this. Pavel is doing this properly, why do you doubt him?

> Why don't you and Nigel (of suspend2) can just work together on this ?
> It's a shame that much work is wasted in duplicated effort.

It's not duplicated, Nigel knows what need to be done to work together,
if he so desires.

thanks,

greg k-h

2005-11-16 07:13:28

by Nigel Cunningham

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi.

On Wed, 2005-11-16 at 17:14, Greg KH wrote:
> On Wed, Nov 16, 2005 at 06:35:30AM +0200, Dumitru Ciobarcianu wrote:
> > ??n data de Mi, 16-11-2005 la 00:32 +0100, Pavel Machek a scris:
> > > ...but how do you provide nice, graphical progress bar for swsusp
> > > without this? People want that, and "esc to abort", compression,
> > > encryption. Too much to be done in kernel space, IMNSHO.
> >
> > Pavel, you really should _listen_ when someone else is talking about the
> > same things in different implementations. suspend2 has this feature
> > (nice graphical progress bars in userspace) for a long time now and it's
> > compatible with the fedora kernels.
>
> It's also implemented in the kernel, which is exactly the wrong place
> for this. Pavel is doing this properly, why do you doubt him?

You yourself called it a hack not long ago. I'm not sure why you think
the userspace is the right place for suspending. It seems to me that the
very fact that it requires access to structures that are normally only
visible to the kernel is pretty telling. To be fair, it is true at the
same time that graphical interfaces don't belong in the kernel - but the
vast majority of it - calculating what to write and doing the writing
does. It's only by hamstringing himself and the user - limiting the
image to half of memory that Pavel (and dropping support for writing to
swap) that Pavel can make this work.

> > Why don't you and Nigel (of suspend2) can just work together on this ?
> > It's a shame that much work is wasted in duplicated effort.
>
> It's not duplicated, Nigel knows what need to be done to work together,
> if he so desires.

I know that Pavel and I have such different ideas about what should be
done that it's not worth the effort.

Regards,

Nigel

> thanks,
>
> greg k-h
>
> ______________________________________________________________________
> _______________________________________________
> linux-pm mailing list
> [email protected]
> https://lists.osdl.org/mailman/listinfo/linux-pm
--


2005-11-16 08:56:19

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi!

> > > Even it were not for this, the whole idea seems misconcieved to me
> > > anyway.
> >
> > ...but how do you provide nice, graphical progress bar for swsusp
> > without this? People want that, and "esc to abort", compression,
> > encryption. Too much to be done in kernel space, IMNSHO.
>
> I'll take "rootkit doesnt work" over "bells and whistles".

It moves bunch of code from kernelspace to userspace. You don't have
to add bells and whistles at the same time. That's normally called
good thing. If Fedora has special needs, fine.
Pavel
--
Thanks, Sharp!

2005-11-16 15:59:55

by Stefan Rompf

[permalink] [raw]
Subject: Re: [RFC] userland swsusp

Pavel Machek wrote:

> This is prototype of userland swsusp. I'd like kernel parts to go in,
> probably for 2.6.16. Now, I'm not sure about the interface, ioctls are
> slightly ugly, OTOH it would be probably overkill to introduce
> syscalls just for this. (I'll need to add an ioctl for freeing memory
> in future).

I'm curious on the restrictions the userspace part would have to accept.
Can /usr/swsusp.c write to a file? Currently, you allow it, but I doubt
whether it would be wise to write to a file after you've snapshotted
kernel's filesystem state. OTOH, I don't want to reserve a partition just
for the image. Can userspace allocate memory after ioctl(SYS_FREEZE)?

I have userspace supported encryption of the image in mind.

Stefan

2005-11-16 17:06:38

by Greg KH

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Wed, Nov 16, 2005 at 05:00:45PM +1100, Nigel Cunningham wrote:
> Hi.
>
> On Wed, 2005-11-16 at 17:14, Greg KH wrote:
> > On Wed, Nov 16, 2005 at 06:35:30AM +0200, Dumitru Ciobarcianu wrote:
> > > ??n data de Mi, 16-11-2005 la 00:32 +0100, Pavel Machek a scris:
> > > > ...but how do you provide nice, graphical progress bar for swsusp
> > > > without this? People want that, and "esc to abort", compression,
> > > > encryption. Too much to be done in kernel space, IMNSHO.
> > >
> > > Pavel, you really should _listen_ when someone else is talking about the
> > > same things in different implementations. suspend2 has this feature
> > > (nice graphical progress bars in userspace) for a long time now and it's
> > > compatible with the fedora kernels.
> >
> > It's also implemented in the kernel, which is exactly the wrong place
> > for this. Pavel is doing this properly, why do you doubt him?
>
> You yourself called it a hack not long ago.

I did, in the proud tradition of neat hacks. It's a very nice
accomplishment that this even works, and I'm impressed.

> I'm not sure why you think the userspace is the right place for
> suspending.

If he can come up with an implementation that works, and puts stuff like
the pretty spinning wheels and progress bars and encryption in
userspace, that's great. That stuff doesn't belong in the kerenel if we
can possibly help it.

> It seems to me that the very fact that it requires access to
> structures that are normally only visible to the kernel is pretty
> telling.

So it needs some work :)

> To be fair, it is true at the same time that graphical interfaces
> don't belong in the kernel - but the vast majority of it - calculating
> what to write and doing the writing does. It's only by hamstringing
> himself and the user - limiting the image to half of memory that Pavel
> (and dropping support for writing to swap) that Pavel can make this
> work.

Then propose a better way to do this, if you can see one.

> > > Why don't you and Nigel (of suspend2) can just work together on this ?
> > > It's a shame that much work is wasted in duplicated effort.
> >
> > It's not duplicated, Nigel knows what need to be done to work together,
> > if he so desires.
>
> I know that Pavel and I have such different ideas about what should be
> done that it's not worth the effort.

I'm sorry that you feel this way. I thought that after our meeting in
July that things were different.

thanks,

greg k-h

2005-11-16 19:20:01

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC] userland swsusp

Hi!

> > This is prototype of userland swsusp. I'd like kernel parts to go in,
> > probably for 2.6.16. Now, I'm not sure about the interface, ioctls are
> > slightly ugly, OTOH it would be probably overkill to introduce
> > syscalls just for this. (I'll need to add an ioctl for freeing memory
> > in future).
>
> I'm curious on the restrictions the userspace part would have to accept.
> Can /usr/swsusp.c write to a file? Currently, you allow it, but I doubt

No. Writing to file would trash the filesystem. But you can bmap the file,
then write to the block device.

> whether it would be wise to write to a file after you've snapshotted
> kernel's filesystem state. OTOH, I don't want to reserve a partition just
> for the image. Can userspace allocate memory after ioctl(SYS_FREEZE)?

Better avoid memory allocation.

> I have userspace supported encryption of the image in mind.

Yes, that should be feasible.
Pavel
--
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms

2005-11-16 21:10:08

by Nigel Cunningham

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi Greg.

On Thu, 2005-11-17 at 03:50, Greg KH wrote:
> On Wed, Nov 16, 2005 at 05:00:45PM +1100, Nigel Cunningham wrote:
> > Hi.
> >
> > On Wed, 2005-11-16 at 17:14, Greg KH wrote:
> > > On Wed, Nov 16, 2005 at 06:35:30AM +0200, Dumitru Ciobarcianu wrote:
> > > > ??n data de Mi, 16-11-2005 la 00:32 +0100, Pavel Machek a scris:
> > > > > ...but how do you provide nice, graphical progress bar for swsusp
> > > > > without this? People want that, and "esc to abort", compression,
> > > > > encryption. Too much to be done in kernel space, IMNSHO.
> > > >
> > > > Pavel, you really should _listen_ when someone else is talking about the
> > > > same things in different implementations. suspend2 has this feature
> > > > (nice graphical progress bars in userspace) for a long time now and it's
> > > > compatible with the fedora kernels.
> > >
> > > It's also implemented in the kernel, which is exactly the wrong place
> > > for this. Pavel is doing this properly, why do you doubt him?
> >
> > You yourself called it a hack not long ago.
>
> I did, in the proud tradition of neat hacks. It's a very nice
> accomplishment that this even works, and I'm impressed.
>
> > I'm not sure why you think the userspace is the right place for
> > suspending.
>
> If he can come up with an implementation that works, and puts stuff like
> the pretty spinning wheels and progress bars and encryption in
> userspace, that's great. That stuff doesn't belong in the kerenel if we
> can possibly help it.

I can agree with putting splash screens and userspace stuff in
userspace. Suspend2 has had that too, since March. But the guts of the
code is a different thing. Encryption - well, I think we're both using
cryptoapi now, so that's more easily done in the kernel.

> > It seems to me that the very fact that it requires access to
> > structures that are normally only visible to the kernel is pretty
> > telling.
>
> So it needs some work :)

rm :)

> > To be fair, it is true at the same time that graphical interfaces
> > don't belong in the kernel - but the vast majority of it - calculating
> > what to write and doing the writing does. It's only by hamstringing
> > himself and the user - limiting the image to half of memory that Pavel
> > (and dropping support for writing to swap) that Pavel can make this
> > work.
>
> Then propose a better way to do this, if you can see one.

We've done the user interface in userspace using netlink to
communication.

We've done storing a full image of memory by storing the page cache
separately to the rest of the image, so that it doesn't need to have an
atomic copy made. (Nothing that uses the page cache is running anyway).
Having done this, we can use the memory occupied by the page cache for
our atomic copy, and just reread the overwritten page cache pages if we
need to cancel the suspend. Suspend2 has done this since... beta18 I
think.

> > > > Why don't you and Nigel (of suspend2) can just work together on this ?
> > > > It's a shame that much work is wasted in duplicated effort.
> > >
> > > It's not duplicated, Nigel knows what need to be done to work together,
> > > if he so desires.
> >
> > I know that Pavel and I have such different ideas about what should be
> > done that it's not worth the effort.
>
> I'm sorry that you feel this way. I thought that after our meeting in
> July that things were different.

I'm sorry you came away with that impression. I want to work together,
but I'm not willing to settle for a minimalist implementation. Pavel, on
the other hand, wanted a minimalist implementation at first. He seems to
be changing his mind a bit now, but I'm not sure how far that will go.

Regards,

Nigel

> thanks,
>
> greg k-h
--


2005-11-16 21:35:34

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi, all!

> > > > It's also implemented in the kernel, which is exactly the wrong place
> > > > for this. Pavel is doing this properly, why do you doubt him?
> > >
> > > You yourself called it a hack not long ago.
> >
> > I did, in the proud tradition of neat hacks. It's a very nice
> > accomplishment that this even works, and I'm impressed.
> >
> > > I'm not sure why you think the userspace is the right place for
> > > suspending.
> >
> > If he can come up with an implementation that works, and puts stuff like
> > the pretty spinning wheels and progress bars and encryption in
> > userspace, that's great. That stuff doesn't belong in the kerenel if we
> > can possibly help it.
>
> I can agree with putting splash screens and userspace stuff in
> userspace. Suspend2 has had that too, since March. But the guts of
> the

Well, I'd say that having to resort to netlink is ... not quite
nice. You get all the complexity of having userspace running during
suspend, and get very little benefit.

> code is a different thing. Encryption - well, I think we're both using
> cryptoapi now, so that's more easily done in the kernel.

Its not only encryption. It is encryption, compression, support for
suspend over network, support for suspend into file. That's quite a
lot of stuff.

> > Then propose a better way to do this, if you can see one.
>
> We've done the user interface in userspace using netlink to
> communication.
>
> We've done storing a full image of memory by storing the page cache
> separately to the rest of the image, so that it doesn't need to have an
> atomic copy made. (Nothing that uses the page cache is running anyway).
> Having done this, we can use the memory occupied by the page cache for
> our atomic copy, and just reread the overwritten page cache pages if we
> need to cancel the suspend. Suspend2 has done this since... beta18 I
> think.

...at expense of complexity, and hooks all over the kernel. Yes, if
you modify kernel a bit, nothing will use the page cache.

Anyway, I believe we have solution for that one. See Rafael's recent
patches -- "only free as much memory as neccessary" should do the
trick, without excessive complexity.

> > > I know that Pavel and I have such different ideas about what should be
> > > done that it's not worth the effort.
> >
> > I'm sorry that you feel this way. I thought that after our meeting in
> > July that things were different.
>
> I'm sorry you came away with that impression. I want to work together,
> but I'm not willing to settle for a minimalist implementation. Pavel, on
> the other hand, wanted a minimalist implementation at first. He seems to
> be changing his mind a bit now, but I'm not sure how far that will go.

Well, I do not want the complexity of two page sets. I think Rafael's
patches will provide almost equivalent functionality. Other than that,
all your features should be doable. I'm not saying I'm going to write
those patches myself, but I'll certainly not reject them just because
they are too big.
Pavel
--
Thanks, Sharp!

2005-11-16 21:40:40

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi,

On Wednesday, 16 of November 2005 00:40, Dave Jones wrote:
> On Wed, Nov 16, 2005 at 12:32:01AM +0100, Pavel Machek wrote:
>
> > If this goes in, you can still keep using old method... I'll not
> > remove it anytime soon.
>
> Ok.
>
> > > Even it were not for this, the whole idea seems misconcieved to me
> > > anyway.
> >
> > ...but how do you provide nice, graphical progress bar for swsusp
> > without this? People want that, and "esc to abort", compression,
> > encryption. Too much to be done in kernel space, IMNSHO.
>
> I'll take "rootkit doesnt work" over "bells and whistles".
>
> I think most users actually care more about "works" than
> "looks pretty, and then fails spectacularly".

I've been discussing this with Pavel for quite some time and my opinion is
that moving the image-writing and reading functionality of swsusp
to the user space makes sense from the technical point of view.

For example it would allow us to add the image encryption (real, eg.
with a passphrase-protected key), image compression, and image
verification in a rather straightforward way. These are important
functionalities, at least for some users.

However, I think we should not try to read and/or set up kernel
data structures from the users space. Instead, we can create an interface
that will allow us to convey the image data and metadata from the
kernel to the user space and vice versa.

Greetings,
Rafael

2005-11-16 22:25:50

by Nigel Cunningham

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi

On Thu, 2005-11-17 at 08:35, Pavel Machek wrote:
> Hi, all!
>
> > > > > It's also implemented in the kernel, which is exactly the wrong place
> > > > > for this. Pavel is doing this properly, why do you doubt him?
> > > >
> > > > You yourself called it a hack not long ago.
> > >
> > > I did, in the proud tradition of neat hacks. It's a very nice
> > > accomplishment that this even works, and I'm impressed.
> > >
> > > > I'm not sure why you think the userspace is the right place for
> > > > suspending.
> > >
> > > If he can come up with an implementation that works, and puts stuff like
> > > the pretty spinning wheels and progress bars and encryption in
> > > userspace, that's great. That stuff doesn't belong in the kerenel if we
> > > can possibly help it.
> >
> > I can agree with putting splash screens and userspace stuff in
> > userspace. Suspend2 has had that too, since March. But the guts of
> > the
>
> Well, I'd say that having to resort to netlink is ... not quite
> nice. You get all the complexity of having userspace running during
> suspend, and get very little benefit.

Mmm, but less complexity than with trying to do the whole suspend from
userspace. (I don't need to export pageflags, bio routines etc or work
around it by using /dev/kmem).

> > code is a different thing. Encryption - well, I think we're both using
> > cryptoapi now, so that's more easily done in the kernel.
>
> Its not only encryption. It is encryption, compression, support for
> suspend over network, support for suspend into file. That's quite a
> lot of stuff.
>
> > > Then propose a better way to do this, if you can see one.
> >
> > We've done the user interface in userspace using netlink to
> > communication.
> >
> > We've done storing a full image of memory by storing the page cache
> > separately to the rest of the image, so that it doesn't need to have an
> > atomic copy made. (Nothing that uses the page cache is running anyway).
> > Having done this, we can use the memory occupied by the page cache for
> > our atomic copy, and just reread the overwritten page cache pages if we
> > need to cancel the suspend. Suspend2 has done this since... beta18 I
> > think.
>
> ...at expense of complexity, and hooks all over the kernel. Yes, if
> you modify kernel a bit, nothing will use the page cache.

Could you back your "hooks all over the kernel" statement up? I do have
some BUG_ON()s aimed at double checking that nothing bad happens, but
they never get hit and obviously aren't required to stop processes using
the page cache. All that's really required is to freeze processes.

> Anyway, I believe we have solution for that one. See Rafael's recent
> patches -- "only free as much memory as neccessary" should do the
> trick, without excessive complexity.

That's still imposing a 1/2 of memory limit, though.

> > > > I know that Pavel and I have such different ideas about what should be
> > > > done that it's not worth the effort.
> > >
> > > I'm sorry that you feel this way. I thought that after our meeting in
> > > July that things were different.
> >
> > I'm sorry you came away with that impression. I want to work together,
> > but I'm not willing to settle for a minimalist implementation. Pavel, on
> > the other hand, wanted a minimalist implementation at first. He seems to
> > be changing his mind a bit now, but I'm not sure how far that will go.
>
> Well, I do not want the complexity of two page sets. I think Rafael's
> patches will provide almost equivalent functionality. Other than that,
> all your features should be doable. I'm not saying I'm going to write
> those patches myself, but I'll certainly not reject them just because
> they are too big.

I'm sorry for making you think that having two pagesets is a complex
issues. I know that when I first did it, I put tight restrictions on
memory usage while the first pageset was written and used a separate
memory pool. Since then, I've realised a far simpler way of handling
this, and the code has been greatly simplified. In essence, all you need
to do is make your I/O code generic enough that it can be passed a list
of pages to write and put page cache pages in a separate list when
figuring out what pages need to be saved. Then you save those pages
before doing your atomic copy of the other pages, and reload them after
restoring the atomic copy at resume time.

Regards,

Nigel


2005-11-16 22:47:53

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi!

> > > I can agree with putting splash screens and userspace stuff in
> > > userspace. Suspend2 has had that too, since March. But the guts of
> > > the
> >
> > Well, I'd say that having to resort to netlink is ... not quite
> > nice. You get all the complexity of having userspace running during
> > suspend, and get very little benefit.
>
> Mmm, but less complexity than with trying to do the whole suspend from
> userspace. (I don't need to export pageflags, bio routines etc or work
> around it by using /dev/kmem).

Well, userland swsusp has pretty low impact on kernel code -- it adds
something like 150 lines:

drivers/char/mem.c | 42 +
include/linux/suspend.h | 23
kernel/power/console.c | 1
kernel/power/disk.c | 19
kernel/power/swsusp.c | 78 +
usr/swsusp-init | 9
8 files changed, 2696 insertions(+), 4 deletions(-)

i don't think you can do much better than that...

> > ...at expense of complexity, and hooks all over the kernel. Yes, if
> > you modify kernel a bit, nothing will use the page cache.
>
> Could you back your "hooks all over the kernel" statement up? I do have
> some BUG_ON()s aimed at double checking that nothing bad happens, but
> they never get hit and obviously aren't required to stop processes using
> the page cache. All that's really required is to freeze processes.

Are you willing to merge the code without BUG_ONs?

> > Anyway, I believe we have solution for that one. See Rafael's recent
> > patches -- "only free as much memory as neccessary" should do the
> > trick, without excessive complexity.
>
> That's still imposing a 1/2 of memory limit, though.

Yes, hopefully users will not notice.

> > Well, I do not want the complexity of two page sets. I think Rafael's
> > patches will provide almost equivalent functionality. Other than that,
> > all your features should be doable. I'm not saying I'm going to write
> > those patches myself, but I'll certainly not reject them just because
> > they are too big.
>
> I'm sorry for making you think that having two pagesets is a complex
> issues. I know that when I first did it, I put tight restrictions on
> memory usage while the first pageset was written and used a separate
> memory pool. Since then, I've realised a far simpler way of handling
> this, and the code has been greatly simplified. In essence, all you need
> to do is make your I/O code generic enough that it can be passed a list
> of pages to write and put page cache pages in a separate list when
> figuring out what pages need to be saved. Then you save those pages
> before doing your atomic copy of the other pages, and reload them after
> restoring the atomic copy at resume time.

Okay, it may have gotten better. Anyway, this is the only part that
really needs to be in-kernel. Saving 50% of memory is still going to
produce *way* more responsive system than "save as little as
possible", and I hope it will be good enough.
Pavel
--
Thanks, Sharp!

2005-11-16 23:06:04

by Nigel Cunningham

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi.

On Thu, 2005-11-17 at 09:47, Pavel Machek wrote:
> Hi!
>
> > > > I can agree with putting splash screens and userspace stuff in
> > > > userspace. Suspend2 has had that too, since March. But the guts of
> > > > the
> > >
> > > Well, I'd say that having to resort to netlink is ... not quite
> > > nice. You get all the complexity of having userspace running during
> > > suspend, and get very little benefit.
> >
> > Mmm, but less complexity than with trying to do the whole suspend from
> > userspace. (I don't need to export pageflags, bio routines etc or work
> > around it by using /dev/kmem).
>
> Well, userland swsusp has pretty low impact on kernel code -- it adds
> something like 150 lines:
>
> drivers/char/mem.c | 42 +
> include/linux/suspend.h | 23
> kernel/power/console.c | 1
> kernel/power/disk.c | 19
> kernel/power/swsusp.c | 78 +
> usr/swsusp-init | 9
> 8 files changed, 2696 insertions(+), 4 deletions(-)
>
> i don't think you can do much better than that...
>
> > > ...at expense of complexity, and hooks all over the kernel. Yes, if
> > > you modify kernel a bit, nothing will use the page cache.
> >
> > Could you back your "hooks all over the kernel" statement up? I do have
> > some BUG_ON()s aimed at double checking that nothing bad happens, but
> > they never get hit and obviously aren't required to stop processes using
> > the page cache. All that's really required is to freeze processes.
>
> Are you willing to merge the code without BUG_ONs?

Yes.

> > > Anyway, I believe we have solution for that one. See Rafael's recent
> > > patches -- "only free as much memory as neccessary" should do the
> > > trick, without excessive complexity.
> >
> > That's still imposing a 1/2 of memory limit, though.
>
> Yes, hopefully users will not notice.

Users with more memory probably won't care so much, depending on what
apps they want to run and how responsive they want the system to be
post-resume.

> > > Well, I do not want the complexity of two page sets. I think Rafael's
> > > patches will provide almost equivalent functionality. Other than that,
> > > all your features should be doable. I'm not saying I'm going to write
> > > those patches myself, but I'll certainly not reject them just because
> > > they are too big.
> >
> > I'm sorry for making you think that having two pagesets is a complex
> > issues. I know that when I first did it, I put tight restrictions on
> > memory usage while the first pageset was written and used a separate
> > memory pool. Since then, I've realised a far simpler way of handling
> > this, and the code has been greatly simplified. In essence, all you need
> > to do is make your I/O code generic enough that it can be passed a list
> > of pages to write and put page cache pages in a separate list when
> > figuring out what pages need to be saved. Then you save those pages
> > before doing your atomic copy of the other pages, and reload them after
> > restoring the atomic copy at resume time.
>
> Okay, it may have gotten better. Anyway, this is the only part that
> really needs to be in-kernel. Saving 50% of memory is still going to
> produce *way* more responsive system than "save as little as
> possible", and I hope it will be good enough.

I agree about the 'way more responsive system'. Good enough will depend
on the user and which way the wind is blowing at the time. I guess if
that's the only option they have, it's still better than rebooting.

Regards,

Nigel

2005-11-17 07:18:59

by Stefan Rompf

[permalink] [raw]
Subject: Re: [RFC] userland swsusp

Am Mittwoch 16 November 2005 20:07 schrieb Pavel Machek:

> No. Writing to file would trash the filesystem. But you can bmap the file,
> then write to the block device.

And for reading, I could used a device mapper enforced read only mount or
filesystem code from grub.

Hmm, how about a possibility to ask the kernel for a list of free pages on a
swap device? This way, userspace could write the image to swap as the kernel
currently does, avoiding possible trouble with filesystems.

> Better avoid memory allocation.

And all memory allocated and mapped in advance would be part of the image. But
this is totally acceptable for a "suspend helper".

Stefan

2005-11-17 10:02:31

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC] userland swsusp

Hi!

> > No. Writing to file would trash the filesystem. But you can bmap the file,
> > then write to the block device.
>
> And for reading, I could used a device mapper enforced read only mount or
> filesystem code from grub.

Or use a filesystem that honours read-only option, like ext2...

> Hmm, how about a possibility to ask the kernel for a list of free pages on a
> swap device? This way, userspace could write the image to swap as the kernel
> currently does, avoiding possible trouble with filesystems.

Yes, that is the plan.

> > Better avoid memory allocation.
>
> And all memory allocated and mapped in advance would be part of the image. But
> this is totally acceptable for a "suspend helper".

Yes.
Pavel
--
Thanks, Sharp!

2005-11-18 19:04:59

by Alan

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Maw, 2005-11-15 at 17:25 -0500, Dave Jones wrote:
> Just for info: If this goes in, Red Hat/Fedora kernels will fork
> swsusp development, as this method just will not work there.
> (We have a restricted /dev/mem that prevents writes to arbitary
> memory regions, as part of a patchset to prevent rootkits)

Perhaps it is trying to tell you that you should be using SELinux rules
not kernel hacks for this purpose ?

> Even it were not for this, the whole idea seems misconcieved to me
> anyway.

I'm sceptical too but several Win9x BIOS vendor suspend paths were
implemented in roughly this way. I don't however see how you can
co-ordinate the freeze with outstanding O_DIRECT DMA to user pages for
one item.


2005-11-18 21:19:13

by Dave Jones

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Fri, Nov 18, 2005 at 07:36:29PM +0000, Alan Cox wrote:
> On Maw, 2005-11-15 at 17:25 -0500, Dave Jones wrote:
> > Just for info: If this goes in, Red Hat/Fedora kernels will fork
> > swsusp development, as this method just will not work there.
> > (We have a restricted /dev/mem that prevents writes to arbitary
> > memory regions, as part of a patchset to prevent rootkits)
>
> Perhaps it is trying to tell you that you should be using SELinux rules
> not kernel hacks for this purpose ?

I don't think selinux can give you the granularity to say
"process can access this bit of the file only", at least not yet.

Even if that was capable however, it still doesn't solve the problem.
Pavel's implementation wants to write to arbitary address spaces, which is
what we're trying to prevent. The two are at odds with each other.

Dave

2005-11-18 21:23:30

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Fri, 2005-11-18 at 19:36 +0000, Alan Cox wrote:
> On Maw, 2005-11-15 at 17:25 -0500, Dave Jones wrote:
> > Just for info: If this goes in, Red Hat/Fedora kernels will fork
> > swsusp development, as this method just will not work there.
> > (We have a restricted /dev/mem that prevents writes to arbitary
> > memory regions, as part of a patchset to prevent rootkits)
>
> Perhaps it is trying to tell you that you should be using SELinux rules
> not kernel hacks for this purpose ?

actually no. SELinux can't work, we've looked at that bigtime. Basically
/dev/mem has 3 types in one, and to apply security you need different
roles for each in selinux. so the only option to apply selinux
*anything* is to first split /dev/mem up.

types:
1) accessing non-ram memory (eg PCI mmio space) by X and the likes
(ideally should use sysfs but hey, changing X for this will take
forever)
2) accessing bios memory in the lower 1Gb for various emulation like
purposes (including vbetool and X mode setting)
3) accessing things the kernel sees as RAM

they are very distinct security wise.


2005-11-18 21:35:50

by Alan

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Gwe, 2005-11-18 at 22:23 +0100, Arjan van de Ven wrote:
> 1) accessing non-ram memory (eg PCI mmio space) by X and the likes
> (ideally should use sysfs but hey, changing X for this will take
> forever)

Once sysfs supports the relevant capabilities fixing X actually doesn't
look too horrible, the PCI mapping routines are abstracted and done by
PCITAG (ie PCI device). You would need the ISA hole too in some cases.

> 2) accessing bios memory in the lower 1Gb for various emulation like
> purposes (including vbetool and X mode setting)
> 3) accessing things the kernel sees as RAM
>
> they are very distinct security wise.

Agreed.

2005-11-18 21:49:03

by Greg KH

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Fri, Nov 18, 2005 at 04:18:47PM -0500, Dave Jones wrote:
> Even if that was capable however, it still doesn't solve the problem.
> Pavel's implementation wants to write to arbitary address spaces, which is
> what we're trying to prevent. The two are at odds with each other.

I agree, he needs to find a different way to get that information into
and out of the kernel than that device node for it to be accepted into
mainline. But for now, it's a nice way to mock up the fuctionality
needed.

thanks,

greg k-h

2005-11-18 23:39:37

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi!

> > Even it were not for this, the whole idea seems misconcieved to me
> > anyway.
>
> I'm sceptical too but several Win9x BIOS vendor suspend paths were
> implemented in roughly this way. I don't however see how you can
> co-ordinate the freeze with outstanding O_DIRECT DMA to user pages for
> one item.

I do not see a problem. swsusp process stops all other processes, freezes
the drivers, then asks for system snapshot. It certainly does *not* ask for
O_DIRECT........

--
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms

2005-11-19 05:54:17

by Jesse Barnes

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Friday, November 18, 2005 2:07 pm, Alan Cox wrote:
> On Gwe, 2005-11-18 at 22:23 +0100, Arjan van de Ven wrote:
> > 1) accessing non-ram memory (eg PCI mmio space) by X and the likes
> > (ideally should use sysfs but hey, changing X for this will take
> > forever)
>
> Once sysfs supports the relevant capabilities fixing X actually
> doesn't look too horrible, the PCI mapping routines are abstracted
> and done by PCITAG (ie PCI device). You would need the ISA hole too
> in some cases.

It's actually partly done already (at least for ia64, but the code I put
together works on x86 too, iirc, and should work elsewhere). The ISA
stuff is exported on a per-bus basis in legacy_io and legacy_mem files.

If vbetool and friends want to get at the ROM, they can use the sysfs
rom file like everyone else. There are problems with this however, on
systems where the ROM is unpacked at 0xc0000 or something, especially
if the unpacked version is modified by the BIOS at startup time, not
sure how to address that reliably.

Jesse

2005-11-19 08:44:37

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Fri, 2005-11-18 at 22:07 +0000, Alan Cox wrote:
> On Gwe, 2005-11-18 at 22:23 +0100, Arjan van de Ven wrote:
> > 1) accessing non-ram memory (eg PCI mmio space) by X and the likes
> > (ideally should use sysfs but hey, changing X for this will take
> > forever)
>
> Once sysfs supports the relevant capabilities fixing X actually doesn't
> look too horrible

I think the kernel already supports this since at least july if not
earlier. If there's something missing... someone needs to speak up..

(yes vga arbitrage is missing but well that's not there today by any
means either so not a regression)

> , the PCI mapping routines are abstracted and done by
> PCITAG (ie PCI device). You would need the ISA hole too in some cases.

this may need /dev/mem a bit longer, but hopefully is rarer. Once the
pci side is fixed I bet this only is easy to do as well


2005-11-19 09:33:00

by Rob Landley

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Wednesday 16 November 2005 00:00, Nigel Cunningham wrote:
> > It's not duplicated, Nigel knows what need to be done to work together,
> > if he so desires.
>
> I know that Pavel and I have such different ideas about what should be
> done that it's not worth the effort.

So first it was Pavel and Patrick Mochel...

Then Pavel and Nigel...

Recently Dave Jones rumbled about a suspend fork...

You sure you software suspend guys haven't been hanging out with the IDE
maintainers?

Rob

2005-11-20 21:23:11

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi!

> > > Just for info: If this goes in, Red Hat/Fedora kernels will fork
> > > swsusp development, as this method just will not work there.
> > > (We have a restricted /dev/mem that prevents writes to arbitary
> > > memory regions, as part of a patchset to prevent rootkits)
> >
> > Perhaps it is trying to tell you that you should be using SELinux rules
> > not kernel hacks for this purpose ?
>
> I don't think selinux can give you the granularity to say
> "process can access this bit of the file only", at least not yet.
>
> Even if that was capable however, it still doesn't solve the problem.
> Pavel's implementation wants to write to arbitary address spaces, which is
> what we're trying to prevent. The two are at odds with each other.

I do not think thats a security problem. By definition, suspending code
can change arbitrary things in memory -- it could just write image with
changes it desires, then resume from it. Whether this code is in kernel
or not, it has to be trusted.
--
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms

2005-11-20 21:23:12

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi!

> > > It's not duplicated, Nigel knows what need to be done to work together,
> > > if he so desires.
> >
> > I know that Pavel and I have such different ideas about what should be
> > done that it's not worth the effort.
>
> So first it was Pavel and Patrick Mochel...
>
> Then Pavel and Nigel...
>
> Recently Dave Jones rumbled about a suspend fork...

Pavel and Patrick is solved, and there's no Pavel and Nigel... Its just Pavel
vs. way too much code. See my reply to Dave.

> You sure you software suspend guys haven't been hanging out with the IDE
> maintainers?

:-)

--
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms

2005-11-20 21:48:57

by Dave Jones

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On Sat, Nov 19, 2005 at 11:43:32PM +0000, Pavel Machek wrote:
> Hi!
>
> > > > Just for info: If this goes in, Red Hat/Fedora kernels will fork
> > > > swsusp development, as this method just will not work there.
> > > > (We have a restricted /dev/mem that prevents writes to arbitary
> > > > memory regions, as part of a patchset to prevent rootkits)
> > >
> > > Perhaps it is trying to tell you that you should be using SELinux rules
> > > not kernel hacks for this purpose ?
> >
> > I don't think selinux can give you the granularity to say
> > "process can access this bit of the file only", at least not yet.
> >
> > Even if that was capable however, it still doesn't solve the problem.
> > Pavel's implementation wants to write to arbitary address spaces, which is
> > what we're trying to prevent. The two are at odds with each other.
>
> I do not think thats a security problem. By definition, suspending code
> can change arbitrary things in memory -- it could just write image with
> changes it desires, then resume from it. Whether this code is in kernel
> or not, it has to be trusted.

Stop thinking about the suspend usage case for a minute.

With your proposed changes, an attacker can scribble over random
bits of /dev/mem without suspending in order to do whatever he wants.

With what we have in-kernel, and a restricted /dev/mem, achieving the
attack you mention is a lot less feasible, as the attacker has no access
to the memory being written out to the suspend partition, even as root.
Even if they did, people tend to notice boxes shutting down pretty quickly
making this a not-very-stealthy attack.

Dave

2005-11-20 22:09:36

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi!

> > > > > Just for info: If this goes in, Red Hat/Fedora kernels will fork
> > > > > swsusp development, as this method just will not work there.
> > > > > (We have a restricted /dev/mem that prevents writes to arbitary
> > > > > memory regions, as part of a patchset to prevent rootkits)
> > > >
> > > > Perhaps it is trying to tell you that you should be using SELinux rules
> > > > not kernel hacks for this purpose ?
> > >
> > > I don't think selinux can give you the granularity to say
> > > "process can access this bit of the file only", at least not yet.
> > >
> > > Even if that was capable however, it still doesn't solve the problem.
> > > Pavel's implementation wants to write to arbitary address spaces, which is
> > > what we're trying to prevent. The two are at odds with each other.
> >
> > I do not think thats a security problem. By definition, suspending code
> > can change arbitrary things in memory -- it could just write image with
> > changes it desires, then resume from it. Whether this code is in kernel
> > or not, it has to be trusted.
>
> Stop thinking about the suspend usage case for a minute.
>
> With your proposed changes, an attacker can scribble over random
> bits of /dev/mem without suspending in order to do whatever he
> wants.

Well, without my changes, an attacker can scribble over random bits of
memory, too; I was not the one that introduced /dev/mem :-).

> With what we have in-kernel, and a restricted /dev/mem, achieving the
> attack you mention is a lot less feasible, as the attacker has no access
> to the memory being written out to the suspend partition, even as root.
> Even if they did, people tend to notice boxes shutting down pretty quickly
> making this a not-very-stealthy attack.

Can I read somewhere about security model you are using? Would it be
enough to restrict /dev/[k]mem to those people that have right to
update kernel anyway? Or your approach is "noone, absolutely noone has
right to modify running kernel"? [Do you still use loadable modules?]

Pavel

--
Thanks, Sharp!

2005-11-21 08:31:38

by Rob Landley

[permalink] [raw]
Subject: Re: [RFC] userland swsusp

On Wednesday 16 November 2005 13:07, Pavel Machek wrote:
> > I'm curious on the restrictions the userspace part would have to accept.
> > Can /usr/swsusp.c write to a file? Currently, you allow it, but I doubt
>
> No. Writing to file would trash the filesystem. But you can bmap the file,
> then write to the block device.

Do/should all the filesystems get remounted read-only as a precaution? Or is
that overkill?

Rob

2005-11-21 11:46:57

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi,

On Sunday, 20 of November 2005 23:09, Pavel Machek wrote:
}-- snip --{
> > With what we have in-kernel, and a restricted /dev/mem, achieving the
> > attack you mention is a lot less feasible, as the attacker has no access
> > to the memory being written out to the suspend partition, even as root.
> > Even if they did, people tend to notice boxes shutting down pretty quickly
> > making this a not-very-stealthy attack.
>
> Can I read somewhere about security model you are using? Would it be
> enough to restrict /dev/[k]mem to those people that have right to
> update kernel anyway? Or your approach is "noone, absolutely noone has
> right to modify running kernel"? [Do you still use loadable modules?]

The problem is that, whatever the security model, if you have access to the
kernel memory (eg. via /dev/kmem), you can modify the security rules
themselves, so this should better be avoided.

Apart from this, IMO, if it's necessary to access the kernel memory directly
from a userland process, this means that the process' functionality really
belongs to the kernel. Consequently, the code in swsusp that needs
to access the kernel memory should stay in the kernel, and the rest
can go to the userland.

Greetings,
Rafael

2005-11-21 14:14:53

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC] userland swsusp

Hi!

> > > I'm curious on the restrictions the userspace part would have to accept.
> > > Can /usr/swsusp.c write to a file? Currently, you allow it, but I doubt
> >
> > No. Writing to file would trash the filesystem. But you can bmap the file,
> > then write to the block device.
>
> Do/should all the filesystems get remounted read-only as a precaution? Or is
> that overkill?

You may not do that. remounting the filesystem writes to it... and
that's no-no.
Pavel
--
Thanks, Sharp!

2005-11-21 14:20:09

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Hi!

> > > With what we have in-kernel, and a restricted /dev/mem, achieving the
> > > attack you mention is a lot less feasible, as the attacker has no access
> > > to the memory being written out to the suspend partition, even as root.
> > > Even if they did, people tend to notice boxes shutting down pretty quickly
> > > making this a not-very-stealthy attack.
> >
> > Can I read somewhere about security model you are using? Would it be
> > enough to restrict /dev/[k]mem to those people that have right to
> > update kernel anyway? Or your approach is "noone, absolutely noone has
> > right to modify running kernel"? [Do you still use loadable modules?]
>
> The problem is that, whatever the security model, if you have access to the
> kernel memory (eg. via /dev/kmem), you can modify the security rules
> themselves, so this should better be avoided.

Well, under current linux security model, root has all permissions,
including inserting modifying running kernel, touching hardware
directly, and installing rootkits. Fedora may be trying to change
that... but if so, I'd like to know what they are planning.
Pavel
--
Thanks, Sharp!

2005-11-23 10:16:40

by Lorenzo Colitti

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

Pavel Machek wrote:
> Yes, hopefully users will not notice.

? So the idea is to merge inferior code and "hope users won't notice"?

Users might not notice that half their memory is gone, but they *will*
notice that their system is dog slow when it resumes because all their
caches are gone and a most of their stuff is swapped out.

Non-responsive system on resume is one of the main reasons that swsusp2
is much better than swsusp1, and yes, users *do* notice (I was one of
them, as I pointed out a while back). Yes, 50% is better than nothing,
but it's still a pretty poor show.

Seen from the perspective of a user, the situation is simple: suspend2
works, it's fast, and it's rock-solid. Just use it.


Regards,
Lorenzo

P.S. Don't "show me the code" me. I can't write the code. :-) But based
on what I see of how well suspend2 works, I think Nigel can...

2005-11-23 12:03:04

by Pavel Machek

[permalink] [raw]
Subject: Re: [linux-pm] [RFC] userland swsusp

On St 23-11-05 11:16:27, Lorenzo Colitti wrote:
> Pavel Machek wrote:
> >Yes, hopefully users will not notice.
>
> ? So the idea is to merge inferior code and "hope users won't notice"?
>
> Users might not notice that half their memory is gone, but they *will*
> notice that their system is dog slow when it resumes because all their
> caches are gone and a most of their stuff is swapped out.
>
> Non-responsive system on resume is one of the main reasons that swsusp2
> is much better than swsusp1, and yes, users *do* notice (I was one of
> them, as I pointed out a while back). Yes, 50% is better than nothing,
> but it's still a pretty poor show.

Did you actually benchmark it?

> Seen from the perspective of a user, the situation is simple: suspend2
> works, it's fast, and it's rock-solid. Just use it.

About as helpful as "Windows XP works, it's fast, and it's
rock-solid".

Pavel
--
Thanks, Sharp!