2009-12-31 04:10:33

by Nitin Gupta

[permalink] [raw]
Subject: [PATCH 1/2] [mmotm v2] Add notifiers for various swap events

Events:
- Swapon
- Swapoff
- When a swap slot is freed

This is required for ramzswap module which implements RAM based block
devices to be used as swap disks. These devices require a notification
on these events to function properly.

Currently, I'm not sure if any of these event notifiers have any other
users. However, adding ramzswap specific hooks instead of this generic
approach resulted in a bad/hacky code with too many 'ifdef CONFIG_RAMZSWAP'
spread all over the core kernel code.

For SWAP_EVENT_SLOT_FREE, callbacks are made under swap_lock. Currently, this
is not a problem since ramzswap is the only user and the callback it registers
can be safely made under this lock. However, if this event finds more users,
we might have to work on reducing contention on this lock (per-swap lock?).

v2: added comment before swap slot free notifier call to clarify that
ramzswap is currently the only user of this notifier.

Signed-off-by: Nitin Gupta <[email protected]>
---
include/linux/swap.h | 13 ++++++++-
mm/swapfile.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a2602a8..43e6a96 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -12,8 +12,6 @@
#include <asm/atomic.h>
#include <asm/page.h>

-struct notifier_block;
-
struct bio;

#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
@@ -150,6 +148,12 @@ enum {
SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */
};

+enum swap_event {
+ SWAP_EVENT_SWAPON,
+ SWAP_EVENT_SWAPOFF,
+ SWAP_EVENT_SLOT_FREE,
+};
+
#define SWAP_CLUSTER_MAX 32

#define SWAP_MAP_MAX 0x3e /* Max duplication count, in first swap_map */
@@ -180,6 +184,7 @@ struct swap_info_struct {
struct swap_extent *curr_swap_extent;
struct swap_extent first_swap_extent;
struct block_device *bdev; /* swap device or bdev of swap file */
+ struct atomic_notifier_head slot_free_notify_list;
struct file *swap_file; /* seldom referenced */
unsigned int old_block_size; /* seldom referenced */
};
@@ -329,6 +334,10 @@ extern sector_t map_swap_page(struct page *, struct block_device **);
extern sector_t swapdev_block(int, pgoff_t);
extern int reuse_swap_page(struct page *);
extern int try_to_free_swap(struct page *);
+extern int register_swap_event_notifier(struct notifier_block *nb,
+ enum swap_event event, unsigned long val);
+extern int unregister_swap_event_notifier(struct notifier_block *nb,
+ enum swap_event event, unsigned long val);
struct backing_dev_info;

/* linux/mm/thrash.c */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6c0585b..354bc9d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -57,6 +57,8 @@ static struct swap_list_t swap_list = {-1, -1};
static struct swap_info_struct *swap_info[MAX_SWAPFILES];

static DEFINE_MUTEX(swapon_mutex);
+static BLOCKING_NOTIFIER_HEAD(swapon_notify_list);
+static BLOCKING_NOTIFIER_HEAD(swapoff_notify_list);

static inline unsigned char swap_count(unsigned char ent)
{
@@ -583,6 +585,9 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
swap_list.next = p->type;
nr_swap_pages++;
p->inuse_pages--;
+ /* Currently, ramzswap is the only user of this notifier */
+ atomic_notifier_call_chain(&p->slot_free_notify_list,
+ offset, p->swap_file);
}

return usage;
@@ -1609,6 +1614,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
p->swap_map = NULL;
p->flags = 0;
spin_unlock(&swap_lock);
+ blocking_notifier_call_chain(&swapoff_notify_list, type, swap_file);
mutex_unlock(&swapon_mutex);
vfree(swap_map);
/* Destroy swap account informatin */
@@ -2022,7 +2028,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
swap_list.head = swap_list.next = type;
else
swap_info[prev]->next = type;
+ ATOMIC_INIT_NOTIFIER_HEAD(&p->slot_free_notify_list);
spin_unlock(&swap_lock);
+ blocking_notifier_call_chain(&swapon_notify_list, type, swap_file);
mutex_unlock(&swapon_mutex);
error = 0;
goto out;
@@ -2446,3 +2454,62 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
}
}
}
+
+
+int register_swap_event_notifier(struct notifier_block *nb,
+ enum swap_event event, unsigned long val)
+{
+ switch (event) {
+ case SWAP_EVENT_SWAPON:
+ return blocking_notifier_chain_register(
+ &swapon_notify_list, nb);
+ case SWAP_EVENT_SWAPOFF:
+ return blocking_notifier_chain_register(
+ &swapoff_notify_list, nb);
+ case SWAP_EVENT_SLOT_FREE:
+ {
+ struct swap_info_struct *sis;
+
+ if (val > nr_swapfiles)
+ goto out;
+ sis = swap_info[val];
+ return atomic_notifier_chain_register(
+ &sis->slot_free_notify_list, nb);
+ }
+ default:
+ pr_err("Invalid swap event: %d\n", event);
+ };
+
+out:
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(register_swap_event_notifier);
+
+int unregister_swap_event_notifier(struct notifier_block *nb,
+ enum swap_event event, unsigned long val)
+{
+ switch (event) {
+ case SWAP_EVENT_SWAPON:
+ return blocking_notifier_chain_unregister(
+ &swapon_notify_list, nb);
+ case SWAP_EVENT_SWAPOFF:
+ return blocking_notifier_chain_unregister(
+ &swapoff_notify_list, nb);
+ case SWAP_EVENT_SLOT_FREE:
+ {
+ struct swap_info_struct *sis;
+
+ if (val > nr_swapfiles)
+ goto out;
+ sis = swap_info[val];
+ return atomic_notifier_chain_unregister(
+ &sis->slot_free_notify_list, nb);
+ }
+ default:
+ pr_err("Invalid swap event: %d\n", event);
+ };
+
+out:
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(unregister_swap_event_notifier);
--
1.6.2.5


2009-12-31 04:10:41

by Nitin Gupta

[permalink] [raw]
Subject: [PATCH 2/2] [mmotm v2] ramzswap: add handlers for various swap events

The SWAPON handler sets callback which frees memory associated
with given swap slot, eliminating any stale data in corresponding
ramzswap device.

v2: add calls to unregister all notifiers during module unload.

Signed-off-by: Nitin Gupta <[email protected]>
---
drivers/staging/ramzswap/ramzswap_drv.c | 73 +++++++++++++++++++++++++++++
drivers/staging/ramzswap/ramzswap_drv.h | 1 +
drivers/staging/ramzswap/ramzswap_ioctl.h | 1 +
3 files changed, 75 insertions(+), 0 deletions(-)

diff --git a/drivers/staging/ramzswap/ramzswap_drv.c b/drivers/staging/ramzswap/ramzswap_drv.c
index b839f05..fd4db0f 100644
--- a/drivers/staging/ramzswap/ramzswap_drv.c
+++ b/drivers/staging/ramzswap/ramzswap_drv.c
@@ -276,6 +276,7 @@ void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
s->failed_reads = rs->failed_reads;
s->failed_writes = rs->failed_writes;
s->invalid_io = rs->invalid_io;
+ s->notify_free = rs->notify_free;
s->pages_zero = rs->pages_zero;

s->good_compress_pct = good_compress_perc;
@@ -1355,6 +1356,51 @@ static void create_device(struct ramzswap *rzs, int device_id)

rzs->init_done = 0;
}
+static int ramzswap_slot_free_notify(struct notifier_block *self,
+ unsigned long index, void *swap_file)
+{
+ struct ramzswap *rzs;
+
+ rzs = ((struct file *)swap_file)->private_data;
+ ramzswap_free_page(rzs, index);
+ stat_inc(rzs->stats.notify_free);
+ return 0;
+}
+
+static struct notifier_block ramzswap_slot_free_nb = {
+ .notifier_call = ramzswap_slot_free_notify
+};
+
+static int ramzswap_swapon_notify(struct notifier_block *self,
+ unsigned long swap_id, void *swap_file)
+{
+ int ret = 0;
+ struct block_device *bdev;
+ struct file *file;
+ struct inode *inode;
+ struct ramzswap *rzs;
+
+ /* cache ramzswap struct associated with this swap_file */
+ file = (struct file *)swap_file;
+ inode = file->f_mapping->host;
+ bdev = I_BDEV(inode);
+ rzs = bdev->bd_disk->private_data;
+ file->private_data = rzs;
+
+ ret = register_swap_event_notifier(&ramzswap_slot_free_nb,
+ SWAP_EVENT_SLOT_FREE, swap_id);
+ if (ret)
+ pr_err("Error registering swap free notifier\n");
+ return ret;
+}
+
+static int ramzswap_swapoff_notify(struct notifier_block *self,
+ unsigned long swap_id, void *swap_file)
+{
+ unregister_swap_event_notifier(&ramzswap_slot_free_nb,
+ SWAP_EVENT_SLOT_FREE, swap_id);
+ return 0;
+}

static void destroy_device(struct ramzswap *rzs)
{
@@ -1367,6 +1413,14 @@ static void destroy_device(struct ramzswap *rzs)
blk_cleanup_queue(rzs->queue);
}

+static struct notifier_block ramzswap_swapon_nb = {
+ .notifier_call = ramzswap_swapon_notify
+};
+
+static struct notifier_block ramzswap_swapoff_nb = {
+ .notifier_call = ramzswap_swapoff_notify
+};
+
static int __init ramzswap_init(void)
{
int i, ret;
@@ -1399,6 +1453,20 @@ static int __init ramzswap_init(void)
for (i = 0; i < num_devices; i++)
create_device(&devices[i], i);

+ ret = register_swap_event_notifier(&ramzswap_swapon_nb,
+ SWAP_EVENT_SWAPON, 0);
+ if (ret) {
+ pr_err("Error registering swapon notifier\n");
+ goto out;
+ }
+
+ ret = register_swap_event_notifier(&ramzswap_swapoff_nb,
+ SWAP_EVENT_SWAPOFF, 0);
+ if (ret) {
+ pr_err("Error registering swapoff notifier\n");
+ goto out;
+ }
+
return 0;
out:
unregister_blkdev(ramzswap_major, "ramzswap");
@@ -1410,6 +1478,11 @@ static void __exit ramzswap_exit(void)
int i;
struct ramzswap *rzs;

+ unregister_swap_event_notifier(&ramzswap_swapon_nb,
+ SWAP_EVENT_SWAPON, 0);
+ unregister_swap_event_notifier(&ramzswap_swapoff_nb,
+ SWAP_EVENT_SWAPOFF, 0);
+
for (i = 0; i < num_devices; i++) {
rzs = &devices[i];

diff --git a/drivers/staging/ramzswap/ramzswap_drv.h b/drivers/staging/ramzswap/ramzswap_drv.h
index a6ea240..adc841a 100644
--- a/drivers/staging/ramzswap/ramzswap_drv.h
+++ b/drivers/staging/ramzswap/ramzswap_drv.h
@@ -124,6 +124,7 @@ struct ramzswap_stats {
u64 failed_reads; /* can happen when memory is too low */
u64 failed_writes; /* should NEVER! happen */
u64 invalid_io; /* non-swap I/O requests */
+ u64 notify_free; /* no. of swap slot free notifications */
u32 pages_zero; /* no. of zero filled pages */
u32 pages_stored; /* no. of pages currently stored */
u32 good_compress; /* % of pages with compression ratio<=50% */
diff --git a/drivers/staging/ramzswap/ramzswap_ioctl.h b/drivers/staging/ramzswap/ramzswap_ioctl.h
index c713a09..ec50416 100644
--- a/drivers/staging/ramzswap/ramzswap_ioctl.h
+++ b/drivers/staging/ramzswap/ramzswap_ioctl.h
@@ -27,6 +27,7 @@ struct ramzswap_ioctl_stats {
u64 failed_reads; /* can happen when memory is too low */
u64 failed_writes; /* should NEVER! happen */
u64 invalid_io; /* non-swap I/O requests */
+ u64 notify_free; /* no. of swap slot free notifications */
u32 pages_zero; /* no. of zero filled pages */
u32 good_compress_pct; /* no. of pages with compression ratio<=50% */
u32 pages_expand_pct; /* no. of incompressible pages */
--
1.6.2.5

2009-12-31 09:04:14

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH 1/2] [mmotm v2] Add notifiers for various swap events

Nitin Gupta wrote:
> Events:
> - Swapon
> - Swapoff
> - When a swap slot is freed
>
> This is required for ramzswap module which implements RAM based block
> devices to be used as swap disks. These devices require a notification
> on these events to function properly.
>
> Currently, I'm not sure if any of these event notifiers have any other
> users. However, adding ramzswap specific hooks instead of this generic
> approach resulted in a bad/hacky code with too many 'ifdef CONFIG_RAMZSWAP'
> spread all over the core kernel code.
>
> For SWAP_EVENT_SLOT_FREE, callbacks are made under swap_lock. Currently, this
> is not a problem since ramzswap is the only user and the callback it registers
> can be safely made under this lock. However, if this event finds more users,
> we might have to work on reducing contention on this lock (per-swap lock?).
>
> v2: added comment before swap slot free notifier call to clarify that
> ramzswap is currently the only user of this notifier.
>
> Signed-off-by: Nitin Gupta <[email protected]>

Andi didn't seem to like this one and we're still waiting for Hugh to
comment on the issue. But, FWIW,

Acked-by: Pekka Enberg <[email protected]>

Pekka

2009-12-31 09:05:25

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH 2/2] [mmotm v2] ramzswap: add handlers for various swap events

Nitin Gupta wrote:
> The SWAPON handler sets callback which frees memory associated
> with given swap slot, eliminating any stale data in corresponding
> ramzswap device.

I guess this changelog is too terse and should explain _why_ we want
this in more detail.

Pekka

2010-01-01 05:10:42

by Nitin Gupta

[permalink] [raw]
Subject: [PATCH 2/2 repost] [mmotm v2] Add notifiers for various swap events

(reposting with detailed description, no code changes)

ramzswap driver creates RAM backed block devices which are
used as swap disks. Pages swapped to these disks are compressed
and stored in memory itself. However, when a swap page becomes
stale i.e. it is no longer referenced by any process (say, when
owning process exits), the driver does not get any notification
about this. So, it has to keep such pages in memory until kernel
swaps to the same swap slot again thereby overwriting previous
(stale) page.

Often, a large number of such stale pages accumulate which defeats
the whole purpose of in-memory compressed swapping and it begins
to have a negative impact on system performance.

To overcome this problem, we now register a notification for the
event when a swap slot is no longer used. The registered callback
immediately frees corresponding memory, eliminating any stale data
in (compressed) memory.

v2: add calls to unregister all notifiers during module unload.

Signed-off-by: Nitin Gupta <[email protected]>
---
drivers/staging/ramzswap/ramzswap_drv.c | 73 +++++++++++++++++++++++++++++
drivers/staging/ramzswap/ramzswap_drv.h | 1 +
drivers/staging/ramzswap/ramzswap_ioctl.h | 1 +
3 files changed, 75 insertions(+), 0 deletions(-)

diff --git a/drivers/staging/ramzswap/ramzswap_drv.c b/drivers/staging/ramzswap/ramzswap_drv.c
index b839f05..fd4db0f 100644
--- a/drivers/staging/ramzswap/ramzswap_drv.c
+++ b/drivers/staging/ramzswap/ramzswap_drv.c
@@ -276,6 +276,7 @@ void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
s->failed_reads = rs->failed_reads;
s->failed_writes = rs->failed_writes;
s->invalid_io = rs->invalid_io;
+ s->notify_free = rs->notify_free;
s->pages_zero = rs->pages_zero;

s->good_compress_pct = good_compress_perc;
@@ -1355,6 +1356,51 @@ static void create_device(struct ramzswap *rzs, int device_id)

rzs->init_done = 0;
}
+static int ramzswap_slot_free_notify(struct notifier_block *self,
+ unsigned long index, void *swap_file)
+{
+ struct ramzswap *rzs;
+
+ rzs = ((struct file *)swap_file)->private_data;
+ ramzswap_free_page(rzs, index);
+ stat_inc(rzs->stats.notify_free);
+ return 0;
+}
+
+static struct notifier_block ramzswap_slot_free_nb = {
+ .notifier_call = ramzswap_slot_free_notify
+};
+
+static int ramzswap_swapon_notify(struct notifier_block *self,
+ unsigned long swap_id, void *swap_file)
+{
+ int ret = 0;
+ struct block_device *bdev;
+ struct file *file;
+ struct inode *inode;
+ struct ramzswap *rzs;
+
+ /* cache ramzswap struct associated with this swap_file */
+ file = (struct file *)swap_file;
+ inode = file->f_mapping->host;
+ bdev = I_BDEV(inode);
+ rzs = bdev->bd_disk->private_data;
+ file->private_data = rzs;
+
+ ret = register_swap_event_notifier(&ramzswap_slot_free_nb,
+ SWAP_EVENT_SLOT_FREE, swap_id);
+ if (ret)
+ pr_err("Error registering swap free notifier\n");
+ return ret;
+}
+
+static int ramzswap_swapoff_notify(struct notifier_block *self,
+ unsigned long swap_id, void *swap_file)
+{
+ unregister_swap_event_notifier(&ramzswap_slot_free_nb,
+ SWAP_EVENT_SLOT_FREE, swap_id);
+ return 0;
+}

static void destroy_device(struct ramzswap *rzs)
{
@@ -1367,6 +1413,14 @@ static void destroy_device(struct ramzswap *rzs)
blk_cleanup_queue(rzs->queue);
}

+static struct notifier_block ramzswap_swapon_nb = {
+ .notifier_call = ramzswap_swapon_notify
+};
+
+static struct notifier_block ramzswap_swapoff_nb = {
+ .notifier_call = ramzswap_swapoff_notify
+};
+
static int __init ramzswap_init(void)
{
int i, ret;
@@ -1399,6 +1453,20 @@ static int __init ramzswap_init(void)
for (i = 0; i < num_devices; i++)
create_device(&devices[i], i);

+ ret = register_swap_event_notifier(&ramzswap_swapon_nb,
+ SWAP_EVENT_SWAPON, 0);
+ if (ret) {
+ pr_err("Error registering swapon notifier\n");
+ goto out;
+ }
+
+ ret = register_swap_event_notifier(&ramzswap_swapoff_nb,
+ SWAP_EVENT_SWAPOFF, 0);
+ if (ret) {
+ pr_err("Error registering swapoff notifier\n");
+ goto out;
+ }
+
return 0;
out:
unregister_blkdev(ramzswap_major, "ramzswap");
@@ -1410,6 +1478,11 @@ static void __exit ramzswap_exit(void)
int i;
struct ramzswap *rzs;

+ unregister_swap_event_notifier(&ramzswap_swapon_nb,
+ SWAP_EVENT_SWAPON, 0);
+ unregister_swap_event_notifier(&ramzswap_swapoff_nb,
+ SWAP_EVENT_SWAPOFF, 0);
+
for (i = 0; i < num_devices; i++) {
rzs = &devices[i];

diff --git a/drivers/staging/ramzswap/ramzswap_drv.h b/drivers/staging/ramzswap/ramzswap_drv.h
index a6ea240..adc841a 100644
--- a/drivers/staging/ramzswap/ramzswap_drv.h
+++ b/drivers/staging/ramzswap/ramzswap_drv.h
@@ -124,6 +124,7 @@ struct ramzswap_stats {
u64 failed_reads; /* can happen when memory is too low */
u64 failed_writes; /* should NEVER! happen */
u64 invalid_io; /* non-swap I/O requests */
+ u64 notify_free; /* no. of swap slot free notifications */
u32 pages_zero; /* no. of zero filled pages */
u32 pages_stored; /* no. of pages currently stored */
u32 good_compress; /* % of pages with compression ratio<=50% */
diff --git a/drivers/staging/ramzswap/ramzswap_ioctl.h b/drivers/staging/ramzswap/ramzswap_ioctl.h
index c713a09..ec50416 100644
--- a/drivers/staging/ramzswap/ramzswap_ioctl.h
+++ b/drivers/staging/ramzswap/ramzswap_ioctl.h
@@ -27,6 +27,7 @@ struct ramzswap_ioctl_stats {
u64 failed_reads; /* can happen when memory is too low */
u64 failed_writes; /* should NEVER! happen */
u64 invalid_io; /* non-swap I/O requests */
+ u64 notify_free; /* no. of swap slot free notifications */
u32 pages_zero; /* no. of zero filled pages */
u32 good_compress_pct; /* no. of pages with compression ratio<=50% */
u32 pages_expand_pct; /* no. of incompressible pages */
--
1.6.2.5