The snapshot-merge target allows a snapshot to be merged back into the
snapshot's origin device.
One expected use of snapshot merging is the rollback of a root
filesystem after system upgrades (e.g.: yum update). snapshot-merge
enables "system rollback" support for any filesystem that is using the
associated DM/LVM devices.
* "[PATCH v4 05/13] dm snapshot: add snapshot-merge target" adds further
documentation to: Documentation/device-mapper/snapshot.txt
* All patch headers have been revised/audited for added clarity.
* Both the latest handover (v10) and origin-write patch have been
included in this series.
* Aside from snapshot-merge specific testing these patches have
survived 75+ iterations of lvm2's testsuite without any failures.
The test kernel was 2.6.32-rc8 and lvm2 was 2.0.55 (with udev_sync
enabled and dmeventd disabled).
The snapshot-merge quilt tree is maintained here:
http://people.redhat.com/msnitzer/patches/snapshot-merge/kernel/2.6.33/
For LVM2 support please see:
http://people.redhat.com/msnitzer/patches/snapshot-merge/lvm2/LVM2-2.02.55/
Mike Snitzer (3):
dm snapshot: allow live exception store handover between tables
dm exception store: snapshot-merge usage accounting
dm snapshot: merge a linear region of chunks using one large IO
Mikulas Patocka (10):
dm snapshot: rework writing to snapshot origin
dm exception store: add snapshot-merge specific methods
dm snapshot: add snapshot-merge target
dm snapshot: merge target should not allocate new exceptions
dm snapshot: do not allow more than one merging snapshot.
dm snapshot: the merge procedure
dm snapshot: queue writes to an area that is actively being merged
dm snapshot: do not merge a chunk until active writes to it finish
dm snapshot: make exceptions in other snapshots when merging
dm snapshot: redirect accesses to origin if merging snap invalidated
Documentation/device-mapper/snapshot.txt | 52 ++-
drivers/md/dm-exception-store.h | 27 +
drivers/md/dm-snap-persistent.c | 91 ++++-
drivers/md/dm-snap.c | 827 ++++++++++++++++++++++++------
4 files changed, 839 insertions(+), 158 deletions(-)
Permit in-use snapshot exception data to be 'handed over' from one
snapshot instance to another. This is a pre-requisite for patches
that allow the changes made in a snapshot device to be merged back into
its origin device and also allows device resizing.
The basic call sequence is:
dmsetup load new_snapshot (referencing the existing in-use cow device)
- the ctr code detects that the cow is already in use and allows the
two snapshot target instances to be linked together
dmsetup suspend original_snapshot
dmsetup resume new_snapshot
- the new_snapshot becomes live, and if anything now tries to access
the original one it will receive -EIO
dmsetup remove original_snapshot
(There can only be two snapshot targets referencing the same cow device
simultaneously.)
Signed-off-by: Mike Snitzer <[email protected]>
Signed-off-by: Mikulas Patocka <[email protected]>
---
drivers/md/dm-snap.c | 258 ++++++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 231 insertions(+), 27 deletions(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index c6c26a3..60bfefb 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -303,22 +303,111 @@ static void __insert_origin(struct origin *o)
}
/*
+ * _origins_lock must be held when calling this function.
+ * Returns number of snapshots registered using the supplied cow device, plus:
+ * snap_src - a snapshot suitable for use as a source of exception handover
+ * snap_dest - a snapshot capable of receiving exception handover.
+ *
+ * Possible return values and states:
+ * 0: NULL, NULL - first new snapshot
+ * 1: snap_src, NULL - normal snapshot
+ * 2: snap_src, snap_dest - waiting for handover
+ * 2: snap_src, NULL - handed over, waiting for old to be deleted
+ * 1: NULL, snap_dest - source got destroyed without handover
+ */
+static int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
+ struct dm_snapshot **snap_src,
+ struct dm_snapshot **snap_dest)
+{
+ struct dm_snapshot *s;
+ struct origin *o;
+ int count = 0;
+ int active;
+
+ o = __lookup_origin(snap->origin->bdev);
+ if (!o)
+ goto out;
+
+ list_for_each_entry(s, &o->snapshots, list) {
+ if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
+ continue;
+
+ down_read(&s->lock);
+ active = s->active;
+ up_read(&s->lock);
+
+ if (active) {
+ if (snap_src)
+ *snap_src = s;
+ } else if (snap_dest)
+ *snap_dest = s;
+
+ count++;
+ }
+
+out:
+ return count;
+}
+
+static int __validate_exception_handover(struct dm_snapshot *snap)
+{
+ struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+ int r = 0;
+
+ /* Does snapshot need exceptions handed over to it? */
+ if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest) == 2) ||
+ snap_dest) {
+ snap->ti->error = "Snapshot cow pairing for exception "
+ "table handover failed";
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (snap_src)
+ r = 1;
+
+out:
+ return r;
+}
+
+static void __insert_snapshot(struct origin *o, struct dm_snapshot *s)
+{
+ struct dm_snapshot *l;
+
+ /* Sort the list according to chunk size, largest-first smallest-last */
+ list_for_each_entry(l, &o->snapshots, list)
+ if (l->store->chunk_size < s->store->chunk_size)
+ break;
+ list_add_tail(&s->list, &l->list);
+}
+
+/*
* Make a note of the snapshot and its origin so we can look it
* up when the origin has a write on it.
+ *
+ * Also validate snapshot exception store handovers.
+ * On success, returns 1 if this registration is a handover destination,
+ * otherwise returns 0.
*/
static int register_snapshot(struct dm_snapshot *snap)
{
- struct dm_snapshot *l;
- struct origin *o, *new_o;
+ struct origin *o, *new_o = NULL;
struct block_device *bdev = snap->origin->bdev;
+ int r = 0;
new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
if (!new_o)
return -ENOMEM;
down_write(&_origins_lock);
- o = __lookup_origin(bdev);
+ r = __validate_exception_handover(snap);
+ if (r < 0) {
+ kfree(new_o);
+ goto out;
+ }
+
+ o = __lookup_origin(bdev);
if (o)
kfree(new_o);
else {
@@ -332,14 +421,27 @@ static int register_snapshot(struct dm_snapshot *snap)
__insert_origin(o);
}
- /* Sort the list according to chunk size, largest-first smallest-last */
- list_for_each_entry(l, &o->snapshots, list)
- if (l->store->chunk_size < snap->store->chunk_size)
- break;
- list_add_tail(&snap->list, &l->list);
+ __insert_snapshot(o, snap);
+
+out:
+ up_write(&_origins_lock);
+
+ return r;
+}
+
+/*
+ * Move snapshot to correct place in list according to chunk size.
+ */
+static void reregister_snapshot(struct dm_snapshot *s)
+{
+ struct block_device *bdev = s->origin->bdev;
+
+ down_write(&_origins_lock);
+
+ list_del(&s->list);
+ __insert_snapshot(__lookup_origin(bdev), s);
up_write(&_origins_lock);
- return 0;
}
static void unregister_snapshot(struct dm_snapshot *s)
@@ -350,7 +452,7 @@ static void unregister_snapshot(struct dm_snapshot *s)
o = __lookup_origin(s->origin->bdev);
list_del(&s->list);
- if (list_empty(&o->snapshots)) {
+ if (o && list_empty(&o->snapshots)) {
list_del(&o->hash_list);
kfree(o);
}
@@ -660,6 +762,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s->suspended = 0;
atomic_set(&s->pending_exceptions_count, 0);
init_rwsem(&s->lock);
+ INIT_LIST_HEAD(&s->list);
spin_lock_init(&s->pe_lock);
/* Allocate hash table for COW data */
@@ -694,39 +797,55 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
spin_lock_init(&s->tracked_chunk_lock);
- /* Metadata must only be loaded into one table at once */
+ bio_list_init(&s->queued_bios);
+ INIT_WORK(&s->queued_bios_work, flush_queued_bios);
+
+ ti->private = s;
+ ti->num_flush_requests = 1;
+
+ /* Add snapshot to the list of snapshots for this origin */
+ /* Exceptions aren't triggered till snapshot_resume() is called */
+ r = register_snapshot(s);
+ if (r == -ENOMEM) {
+ ti->error = "Snapshot origin struct allocation failed";
+ goto bad_load_and_register;
+ } else if (r < 0) {
+ /* invalid handover, register_snapshot has set ti->error */
+ goto bad_load_and_register;
+ }
+
+ /*
+ * Metadata must only be loaded into one table at once, so skip this
+ * if metadata will be handed over during resume.
+ * Chunk size will be set during the handover - set it to zero to
+ * ensure it's ignored.
+ */
+ if (r > 0) {
+ s->store->chunk_size = 0;
+ return 0;
+ }
+
r = s->store->type->read_metadata(s->store, dm_add_exception,
(void *)s);
if (r < 0) {
ti->error = "Failed to read snapshot metadata";
- goto bad_load_and_register;
+ goto bad_read_metadata;
} else if (r > 0) {
s->valid = 0;
DMWARN("Snapshot is marked invalid.");
}
- bio_list_init(&s->queued_bios);
- INIT_WORK(&s->queued_bios_work, flush_queued_bios);
-
if (!s->store->chunk_size) {
ti->error = "Chunk size not set";
- goto bad_load_and_register;
- }
-
- /* Add snapshot to the list of snapshots for this origin */
- /* Exceptions aren't triggered till snapshot_resume() is called */
- if (register_snapshot(s)) {
- r = -EINVAL;
- ti->error = "Cannot register snapshot origin";
- goto bad_load_and_register;
+ goto bad_read_metadata;
}
-
- ti->private = s;
ti->split_io = s->store->chunk_size;
- ti->num_flush_requests = 1;
return 0;
+bad_read_metadata:
+ unregister_snapshot(s);
+
bad_load_and_register:
mempool_destroy(s->tracked_chunk_pool);
@@ -765,15 +884,58 @@ static void __free_exceptions(struct dm_snapshot *s)
dm_exception_table_exit(&s->complete, exception_cache);
}
+static void __handover_exceptions(struct dm_snapshot *snap_src,
+ struct dm_snapshot *snap_dest)
+{
+ union {
+ struct dm_exception_table table_swap;
+ struct dm_exception_store *store_swap;
+ } u;
+
+ /*
+ * Swap all snapshot context information between the two instances.
+ */
+ u.table_swap = snap_dest->complete;
+ snap_dest->complete = snap_src->complete;
+ snap_src->complete = u.table_swap;
+
+ u.store_swap = snap_dest->store;
+ snap_dest->store = snap_src->store;
+ snap_src->store = u.store_swap;
+
+ snap_dest->store->snap = snap_dest;
+ snap_src->store->snap = snap_src;
+
+ snap_dest->ti->split_io = snap_dest->store->chunk_size;
+ snap_dest->valid = snap_src->valid;
+
+ /*
+ * Set source invalid to ensure it receives no further I/O.
+ */
+ snap_src->valid = 0;
+}
+
static void snapshot_dtr(struct dm_target *ti)
{
#ifdef CONFIG_DM_DEBUG
int i;
#endif
struct dm_snapshot *s = ti->private;
+ struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
flush_workqueue(ksnapd);
+ down_read(&_origins_lock);
+ /* Check whether exception handover must be cancelled */
+ (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest);
+ if (snap_src && snap_dest && (s == snap_src)) {
+ down_write(&snap_dest->lock);
+ snap_dest->valid = 0;
+ up_write(&snap_dest->lock);
+ DMERR("Cancelling snapshot handover.");
+ }
+ up_read(&_origins_lock);
+
/* Prevent further origin writes from using this snapshot. */
/* After this returns there can be no new kcopyd jobs. */
unregister_snapshot(s);
@@ -1186,9 +1348,50 @@ static void snapshot_postsuspend(struct dm_target *ti)
up_write(&s->lock);
}
+static int snapshot_preresume(struct dm_target *ti)
+{
+ int r = 0;
+ struct dm_snapshot *s = ti->private;
+ struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+
+ down_read(&_origins_lock);
+ (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest);
+ if (snap_src && snap_dest) {
+ down_read(&snap_src->lock);
+ if (s == snap_src) {
+ DMERR("Unable to resume snapshot source until "
+ "handover completes.");
+ r = -EINVAL;
+ } else if (!snap_src->suspended) {
+ DMERR("Unable to perform snapshot handover until "
+ "source is suspended.");
+ r = -EINVAL;
+ }
+ up_read(&snap_src->lock);
+ }
+ up_read(&_origins_lock);
+
+ return r;
+}
+
static void snapshot_resume(struct dm_target *ti)
{
struct dm_snapshot *s = ti->private;
+ struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+
+ down_read(&_origins_lock);
+ (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest);
+ if (snap_src && snap_dest) {
+ down_write(&snap_src->lock);
+ down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
+ __handover_exceptions(snap_src, snap_dest);
+ up_write(&snap_dest->lock);
+ up_write(&snap_src->lock);
+ }
+ up_read(&_origins_lock);
+
+ /* Now we have correct chunk size, reregister */
+ reregister_snapshot(s);
down_write(&s->lock);
s->active = 1;
@@ -1506,6 +1709,7 @@ static struct target_type snapshot_target = {
.map = snapshot_map,
.end_io = snapshot_end_io,
.postsuspend = snapshot_postsuspend,
+ .preresume = snapshot_preresume,
.resume = snapshot_resume,
.status = snapshot_status,
.iterate_devices = snapshot_iterate_devices,
--
1.6.5.2
From: Mikulas Patocka <[email protected]>
The previous code selected one exception as "primary_pe", linked all
other exceptions to it and used reference counting to wait until all
exceptions were reallocated.
All the complexity with exceptions linking and reference counting has
been removed. Now, a bio is linked to one exception and when that
exception is reallocated, the bio is retried to possibly wait for other
exceptions.
The new __origin_write() interface affords the snapshot-merge support
the ability to trigger exceptions in other snapshots without having a
need for an associated bio (which snapshot-merge does not generate). As
such this patch is a prerequisite for snapshot-merge.
Signed-off-by: Mikulas Patocka <[email protected]>
Signed-off-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-snap.c | 165 +++++++++++++++++--------------------------------
1 files changed, 57 insertions(+), 108 deletions(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 60bfefb..ad95039 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -137,28 +137,6 @@ struct dm_snap_pending_exception {
struct bio_list origin_bios;
struct bio_list snapshot_bios;
- /*
- * Short-term queue of pending exceptions prior to submission.
- */
- struct list_head list;
-
- /*
- * The primary pending_exception is the one that holds
- * the ref_count and the list of origin_bios for a
- * group of pending_exceptions. It is always last to get freed.
- * These fields get set up when writing to the origin.
- */
- struct dm_snap_pending_exception *primary_pe;
-
- /*
- * Number of pending_exceptions processing this chunk.
- * When this drops to zero we must complete the origin bios.
- * If incrementing or decrementing this, hold pe->snap->lock for
- * the sibling concerned and not pe->primary_pe->snap->lock unless
- * they are the same.
- */
- atomic_t ref_count;
-
/* Pointer back to snapshot context */
struct dm_snapshot *snap;
@@ -997,6 +975,28 @@ static void flush_queued_bios(struct work_struct *work)
flush_bios(queued_bios);
}
+static int do_origin(struct dm_dev *origin, struct bio *bio);
+
+/*
+ * Flush a list of buffers.
+ */
+static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
+{
+ struct bio *n;
+ int r;
+
+ while (bio) {
+ n = bio->bi_next;
+ bio->bi_next = NULL;
+ r = do_origin(s->origin, bio);
+ if (r == DM_MAPIO_REMAPPED)
+ generic_make_request(bio);
+ else
+ BUG_ON(r != DM_MAPIO_SUBMITTED);
+ bio = n;
+ }
+}
+
/*
* Error a list of buffers.
*/
@@ -1030,39 +1030,6 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
dm_table_event(s->ti->table);
}
-static void get_pending_exception(struct dm_snap_pending_exception *pe)
-{
- atomic_inc(&pe->ref_count);
-}
-
-static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
-{
- struct dm_snap_pending_exception *primary_pe;
- struct bio *origin_bios = NULL;
-
- primary_pe = pe->primary_pe;
-
- /*
- * If this pe is involved in a write to the origin and
- * it is the last sibling to complete then release
- * the bios for the original write to the origin.
- */
- if (primary_pe &&
- atomic_dec_and_test(&primary_pe->ref_count)) {
- origin_bios = bio_list_get(&primary_pe->origin_bios);
- free_pending_exception(primary_pe);
- }
-
- /*
- * Free the pe if it's not linked to an origin write or if
- * it's not itself a primary pe.
- */
- if (!primary_pe || primary_pe != pe)
- free_pending_exception(pe);
-
- return origin_bios;
-}
-
static void pending_complete(struct dm_snap_pending_exception *pe, int success)
{
struct dm_exception *e;
@@ -1111,7 +1078,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
out:
dm_remove_exception(&pe->e);
snapshot_bios = bio_list_get(&pe->snapshot_bios);
- origin_bios = put_pending_exception(pe);
+ origin_bios = bio_list_get(&pe->origin_bios);
+ free_pending_exception(pe);
up_write(&s->lock);
@@ -1121,7 +1089,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
else
flush_bios(snapshot_bios);
- flush_bios(origin_bios);
+ retry_origin_bios(s, origin_bios);
}
static void commit_callback(void *context, int success)
@@ -1208,8 +1176,6 @@ __find_pending_exception(struct dm_snapshot *s,
pe->e.old_chunk = chunk;
bio_list_init(&pe->origin_bios);
bio_list_init(&pe->snapshot_bios);
- pe->primary_pe = NULL;
- atomic_set(&pe->ref_count, 0);
pe->started = 0;
if (s->store->type->prepare_exception(s->store, &pe->e)) {
@@ -1217,7 +1183,6 @@ __find_pending_exception(struct dm_snapshot *s,
return NULL;
}
- get_pending_exception(pe);
dm_insert_exception(&s->pending, &pe->e);
return pe;
@@ -1458,14 +1423,21 @@ static int snapshot_iterate_devices(struct dm_target *ti,
/*-----------------------------------------------------------------
* Origin methods
*---------------------------------------------------------------*/
-static int __origin_write(struct list_head *snapshots, struct bio *bio)
+
+/*
+ * Returns:
+ * DM_MAPIO_REMAPPED: bio may be submitted to origin device
+ * DM_MAPIO_SUBMITTED: bio was queued on queue on one of exceptions
+ */
+
+static int __origin_write(struct list_head *snapshots,
+ sector_t sector, struct bio *bio)
{
- int r = DM_MAPIO_REMAPPED, first = 0;
+ int r = DM_MAPIO_REMAPPED;
struct dm_snapshot *snap;
struct dm_exception *e;
- struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
+ struct dm_snap_pending_exception *pe, *pe_to_start = NULL;
chunk_t chunk;
- LIST_HEAD(pe_queue);
/* Do all the snapshots on this origin */
list_for_each_entry (snap, snapshots, list) {
@@ -1477,22 +1449,19 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
goto next_snapshot;
/* Nothing to do if writing beyond end of snapshot */
- if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
+ if (sector >= dm_table_get_size(snap->ti->table))
goto next_snapshot;
/*
* Remember, different snapshots can have
* different chunk sizes.
*/
- chunk = sector_to_chunk(snap->store, bio->bi_sector);
+ chunk = sector_to_chunk(snap->store, sector);
/*
* Check exception table to see if block
* is already remapped in this snapshot
* and trigger an exception if not.
- *
- * ref_count is initialised to 1 so pending_complete()
- * won't destroy the primary_pe while we're inside this loop.
*/
e = dm_lookup_exception(&snap->complete, chunk);
if (e)
@@ -1522,59 +1491,39 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
}
}
- if (!primary_pe) {
- /*
- * Either every pe here has same
- * primary_pe or none has one yet.
- */
- if (pe->primary_pe)
- primary_pe = pe->primary_pe;
- else {
- primary_pe = pe;
- first = 1;
- }
-
- bio_list_add(&primary_pe->origin_bios, bio);
+ r = DM_MAPIO_SUBMITTED;
- r = DM_MAPIO_SUBMITTED;
- }
+ if (bio) {
+ bio_list_add(&pe->origin_bios, bio);
+ bio = NULL;
- if (!pe->primary_pe) {
- pe->primary_pe = primary_pe;
- get_pending_exception(primary_pe);
+ if (!pe->started) {
+ pe->started = 1;
+ pe_to_start = pe;
+ }
}
if (!pe->started) {
pe->started = 1;
- list_add_tail(&pe->list, &pe_queue);
+ start_copy(pe);
}
next_snapshot:
up_write(&snap->lock);
}
- if (!primary_pe)
- return r;
-
/*
- * If this is the first time we're processing this chunk and
- * ref_count is now 1 it means all the pending exceptions
- * got completed while we were in the loop above, so it falls to
- * us here to remove the primary_pe and submit any origin_bios.
+ * pe_to_start is a small performance improvement:
+ * To avoid calling __origin_write N times for N snapshots, we start
+ * the snapshot where we queued the bio as the last one.
+ *
+ * If we start it as the last one, it finishes most likely as the last
+ * one and exceptions in other snapshots will be already finished when
+ * the bio will be retried.
*/
- if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
- flush_bios(bio_list_get(&primary_pe->origin_bios));
- free_pending_exception(primary_pe);
- /* If we got here, pe_queue is necessarily empty. */
- return r;
- }
-
- /*
- * Now that we have a complete pe list we can start the copying.
- */
- list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
- start_copy(pe);
+ if (pe_to_start)
+ start_copy(pe_to_start);
return r;
}
@@ -1590,7 +1539,7 @@ static int do_origin(struct dm_dev *origin, struct bio *bio)
down_read(&_origins_lock);
o = __lookup_origin(origin->bdev);
if (o)
- r = __origin_write(&o->snapshots, bio);
+ r = __origin_write(&o->snapshots, bio->bi_sector, bio);
up_read(&_origins_lock);
return r;
--
1.6.5.2
From: Mikulas Patocka <[email protected]>
prepare_merge: returns the last chunk in the variables passed by reference.
The return value is the number of consecutive chunks.
commit_merge: permanently removes 'n' chunks from the exception store.
'n' is less or equal that the number returned by prepare_merge.
If the caller wishes, it can do the optimization of merging several consecutive
chunks at once. If it doesn't want to do this optimization, it just calls
commit_merge with n == 1.
Signed-off-by: Mikulas Patocka <[email protected]>
Reviewed-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-exception-store.h | 16 +++++++++
drivers/md/dm-snap-persistent.c | 70 +++++++++++++++++++++++++++++++++++++++
2 files changed, 86 insertions(+), 0 deletions(-)
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index bb88746..534427f 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -75,6 +75,22 @@ struct dm_exception_store_type {
void *callback_context);
/*
+ * Returns the last chunk in the pointers. (TODO: -ENOPARSE)
+ * > 0: the number of consecutive chunks that can
+ * be copied in one shot.
+ * == 0: the exception store is empty.
+ * < 0: error.
+ */
+ int (*prepare_merge) (struct dm_exception_store *store,
+ chunk_t *old_chunk, chunk_t *new_chunk);
+
+ /*
+ * Clear the last n exceptions.
+ * n must be <= the value returned by prepare_merge.
+ */
+ int (*commit_merge) (struct dm_exception_store *store, int n);
+
+ /*
* The snapshot is invalid, note this in the metadata.
*/
void (*drop_snapshot) (struct dm_exception_store *store);
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 157999e..1f5752e 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -409,6 +409,15 @@ static void write_exception(struct pstore *ps,
e->new_chunk = cpu_to_le64(de->new_chunk);
}
+static void clear_exception(struct pstore *ps, uint32_t index)
+{
+ struct disk_exception *e = get_exception(ps, index);
+
+ /* clear it */
+ e->old_chunk = 0;
+ e->new_chunk = 0;
+}
+
/*
* Registers the exceptions that are present in the current area.
* 'full' is filled in to indicate if the area has been
@@ -680,6 +689,63 @@ static void persistent_commit_exception(struct dm_exception_store *store,
ps->callback_count = 0;
}
+static int persistent_prepare_merge(struct dm_exception_store *store,
+ chunk_t *old_chunk, chunk_t *new_chunk)
+{
+ int r, i;
+ struct pstore *ps = get_info(store);
+ struct disk_exception de;
+
+ if (!ps->current_committed) {
+ if (!ps->current_area)
+ return 0;
+ ps->current_area--;
+ r = area_io(ps, READ);
+ if (r < 0)
+ return r;
+ ps->current_committed = ps->exceptions_per_area;
+ }
+
+ read_exception(ps, ps->current_committed - 1, &de);
+ *old_chunk = de.old_chunk;
+ *new_chunk = de.new_chunk;
+
+ for (i = 1; i < ps->current_committed; i++) {
+ read_exception(ps, ps->current_committed - 1 - i, &de);
+ if (de.old_chunk != *old_chunk - i ||
+ de.new_chunk != *new_chunk - i)
+ break;
+ }
+
+ return i;
+}
+
+static int persistent_commit_merge(struct dm_exception_store *store, int n)
+{
+ int r, i;
+ struct pstore *ps = get_info(store);
+
+ BUG_ON(n > ps->current_committed);
+
+ for (i = 0; i < n; i++)
+ clear_exception(ps, ps->current_committed - 1 - i);
+
+ r = area_io(ps, WRITE);
+ if (r < 0)
+ return r;
+
+ ps->current_committed -= i;
+
+ /*
+ * ps->next_free cannot really be reliably decreased here (because of
+ * misordered chunks), so don't do it. We don't even need it, because
+ * there is no situation where merging snapshot would become
+ * non-merging.
+ */
+
+ return 0;
+}
+
static void persistent_drop_snapshot(struct dm_exception_store *store)
{
struct pstore *ps = get_info(store);
@@ -748,6 +814,8 @@ static struct dm_exception_store_type _persistent_type = {
.read_metadata = persistent_read_metadata,
.prepare_exception = persistent_prepare_exception,
.commit_exception = persistent_commit_exception,
+ .prepare_merge = persistent_prepare_merge,
+ .commit_merge = persistent_commit_merge,
.drop_snapshot = persistent_drop_snapshot,
.usage = persistent_usage,
.status = persistent_status,
@@ -761,6 +829,8 @@ static struct dm_exception_store_type _persistent_compat_type = {
.read_metadata = persistent_read_metadata,
.prepare_exception = persistent_prepare_exception,
.commit_exception = persistent_commit_exception,
+ .prepare_merge = persistent_prepare_merge,
+ .commit_merge = persistent_commit_merge,
.drop_snapshot = persistent_drop_snapshot,
.usage = persistent_usage,
.status = persistent_status,
--
1.6.5.2
Conditionally adjust snapshot usage accounting if snapshot-merge is in
progress. Care is taken to preserve the established kernel<->userspace
interface.
Signed-off-by: Mike Snitzer <[email protected]>
Cc: Mikulas Patocka <[email protected]>
---
drivers/md/dm-snap-persistent.c | 21 ++++++++++++++++++++-
1 files changed, 20 insertions(+), 1 deletions(-)
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 1f5752e..3cb609b 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -91,6 +91,7 @@ struct pstore {
struct dm_exception_store *store;
int version;
int valid;
+ int merging; /* 1 if there is merging going on */
uint32_t exceptions_per_area;
/*
@@ -506,7 +507,20 @@ static void persistent_usage(struct dm_exception_store *store,
{
struct pstore *ps = get_info(store);
- *sectors_allocated = ps->next_free * store->chunk_size;
+ /*
+ * Must maintain the fact that DM reports all metadata chunks
+ * in 'sectors_allocated'
+ * - preserves the established kernel<->userspace interface
+ * - snapshot-merge must account for the first two metadata
+ * chunks in its 'sectors_allocated'
+ */
+ if (!ps->merging) {
+ *sectors_allocated = ps->next_free * store->chunk_size;
+ } else {
+ *sectors_allocated =
+ (area_location(ps, ps->current_area) - 1 +
+ ps->current_committed + 2) * store->chunk_size;
+ }
*total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
/*
@@ -608,6 +622,8 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
chunk_t next_free;
sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
+ ps->merging = 0;
+
/* Is there enough room ? */
if (size < ((ps->next_free + 1) * store->chunk_size))
return -ENOSPC;
@@ -696,6 +712,8 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
struct pstore *ps = get_info(store);
struct disk_exception de;
+ ps->merging = 1;
+
if (!ps->current_committed) {
if (!ps->current_area)
return 0;
@@ -767,6 +785,7 @@ static int persistent_ctr(struct dm_exception_store *store,
ps->store = store;
ps->valid = 1;
+ ps->merging = 0;
ps->version = SNAPSHOT_DISK_VERSION;
ps->area = NULL;
ps->zero_area = NULL;
--
1.6.5.2
From: Mikulas Patocka <[email protected]>
The snapshot-merge target allows a snapshot to be merged back into the
snapshot's origin device.
One expected use of snapshot merging is the rollback of a root
filesystem after system upgrades (e.g.: yum update). snapshot-merge
enables "system rollback" support for any filesystem that is using the
associated DM devices.
Add snapshot-merge target management to both dm_snapshot_init() and
dm_snapshot_exit(). As an initial place-holder, snapshot-merge is
identical to the snapshot target.
Add "snapshot-merge" to Documentation/device-mapper/snapshot.txt
Signed-off-by: Mikulas Patocka <[email protected]>
Signed-off-by: Mike Snitzer <[email protected]>
---
Documentation/device-mapper/snapshot.txt | 52 +++++++++++++++++++++++++++---
drivers/md/dm-snap.c | 48 ++++++++++++++++++++-------
2 files changed, 83 insertions(+), 17 deletions(-)
diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt
index a5009c8..ec6c883 100644
--- a/Documentation/device-mapper/snapshot.txt
+++ b/Documentation/device-mapper/snapshot.txt
@@ -8,13 +8,19 @@ the block device which are also writable without interfering with the
original content;
*) To create device "forks", i.e. multiple different versions of the
same data stream.
+*) To merge a snapshot of a block device back into the snapshot's origin
+device.
+In the first two cases, dm copies only the chunks of data that get
+changed and uses a separate copy-on-write (COW) block device for
+storage.
-In both cases, dm copies only the chunks of data that get changed and
-uses a separate copy-on-write (COW) block device for storage.
+For snapshot merge the contents of the COW storage are merged back into
+the origin device.
-There are two dm targets available: snapshot and snapshot-origin.
+There are three dm targets available:
+snapshot, snapshot-origin, and snapshot-merge.
*) snapshot-origin <origin>
@@ -40,8 +46,17 @@ The difference is that for transient snapshots less metadata must be
saved on disk - they can be kept in memory by the kernel.
-How this is used by LVM2
-========================
+* snapshot-merge <origin> <COW device> <persistent> <chunksize>
+
+takes the same table arguments as the snapshot target except it only
+works with persistent snapshots. Creates a merging snapshot that takes
+control of the changed chunks stored in the <COW device> of an existing
+snapshot, through a handover procedure, and merges these chunks back
+into the <origin>.
+
+
+How snapshot is used by LVM2
+============================
When you create the first LVM2 snapshot of a volume, four dm devices are used:
1) a device containing the original mapping table of the source volume;
@@ -72,3 +87,30 @@ brw------- 1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow
brw------- 1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap
brw------- 1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base
+
+How snapshot-merge is used by LVM2
+==================================
+A merging snapshot assumes the role of the "snapshot-origin" while
+merging. As such the "snapshot-origin" is replaced with
+"snapshot-merge". The "-real" device is not changed and the "-cow"
+device is renamed to aid LVM2's cleanup of the merging snapshot after it
+completes. The "snapshot" that hands over its COW device to the
+"snapshot-merge" is generally deactivated; but if it is left active it
+will simply return I/O errors.
+
+A snapshot will merge into its origin with the following command:
+
+lvconvert --merge volumeGroup/snap
+
+we'll now have this situation:
+
+# dmsetup table|grep volumeGroup
+
+volumeGroup-base-real: 0 2097152 linear 8:19 384
+volumeGroup-base-cow: 0 204800 linear 8:19 2097536
+volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
+
+# ls -lL /dev/mapper/volumeGroup-*
+brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
+brw------- 1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
+brw------- 1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ad95039..e8a1107 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1664,6 +1664,21 @@ static struct target_type snapshot_target = {
.iterate_devices = snapshot_iterate_devices,
};
+static struct target_type merge_target = {
+ .name = "snapshot-merge",
+ .version = {1, 9, 0},
+ .module = THIS_MODULE,
+ .ctr = snapshot_ctr,
+ .dtr = snapshot_dtr,
+ .map = snapshot_map,
+ .end_io = snapshot_end_io,
+ .postsuspend = snapshot_postsuspend,
+ .preresume = snapshot_preresume,
+ .resume = snapshot_resume,
+ .status = snapshot_status,
+ .iterate_devices = snapshot_iterate_devices,
+};
+
static int __init dm_snapshot_init(void)
{
int r;
@@ -1675,7 +1690,7 @@ static int __init dm_snapshot_init(void)
}
r = dm_register_target(&snapshot_target);
- if (r) {
+ if (r < 0) {
DMERR("snapshot target register failed %d", r);
goto bad_register_snapshot_target;
}
@@ -1683,34 +1698,40 @@ static int __init dm_snapshot_init(void)
r = dm_register_target(&origin_target);
if (r < 0) {
DMERR("Origin target register failed %d", r);
- goto bad1;
+ goto bad_register_origin_target;
+ }
+
+ r = dm_register_target(&merge_target);
+ if (r < 0) {
+ DMERR("Merge target register failed %d", r);
+ goto bad_register_merge_target;
}
r = init_origin_hash();
if (r) {
DMERR("init_origin_hash failed.");
- goto bad2;
+ goto bad_origin_hash;
}
exception_cache = KMEM_CACHE(dm_exception, 0);
if (!exception_cache) {
DMERR("Couldn't create exception cache.");
r = -ENOMEM;
- goto bad3;
+ goto bad_exception_cache;
}
pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
if (!pending_cache) {
DMERR("Couldn't create pending cache.");
r = -ENOMEM;
- goto bad4;
+ goto bad_pending_cache;
}
tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
if (!tracked_chunk_cache) {
DMERR("Couldn't create cache to track chunks in use.");
r = -ENOMEM;
- goto bad5;
+ goto bad_tracked_chunk_cache;
}
ksnapd = create_singlethread_workqueue("ksnapd");
@@ -1724,19 +1745,21 @@ static int __init dm_snapshot_init(void)
bad_pending_pool:
kmem_cache_destroy(tracked_chunk_cache);
-bad5:
+bad_tracked_chunk_cache:
kmem_cache_destroy(pending_cache);
-bad4:
+bad_pending_cache:
kmem_cache_destroy(exception_cache);
-bad3:
+bad_exception_cache:
exit_origin_hash();
-bad2:
+bad_origin_hash:
+ dm_unregister_target(&merge_target);
+bad_register_merge_target:
dm_unregister_target(&origin_target);
-bad1:
+bad_register_origin_target:
dm_unregister_target(&snapshot_target);
-
bad_register_snapshot_target:
dm_exception_store_exit();
+
return r;
}
@@ -1746,6 +1769,7 @@ static void __exit dm_snapshot_exit(void)
dm_unregister_target(&snapshot_target);
dm_unregister_target(&origin_target);
+ dm_unregister_target(&merge_target);
exit_origin_hash();
kmem_cache_destroy(pending_cache);
--
1.6.5.2
From: Mikulas Patocka <[email protected]>
The snapshot-merge target should not allocate new exceptions because the
intent is to merge all of its exceptions as quickly and safely as
possible.
Introduce new method, snapshot_merge_map(), that won't allocate
exceptions. Modify __origin_write() so that it doesn't allocate
exceptions in merging snapshots.
If a write request to a merging snapshot device is to be dispatched
directly to the origin (because the chunk is not remapped or was already
merged), snapshot_merge_map() must make exceptions in other snapshots.
Signed-off-by: Mikulas Patocka <[email protected]>
Signed-off-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-snap.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 48 insertions(+), 1 deletions(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e8a1107..b8838e7 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -127,6 +127,11 @@ static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
return lhs == rhs;
}
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context);
+
+#define is_merge(ti) ((ti)->type->map == snapshot_merge_map)
+
struct dm_snap_pending_exception {
struct dm_exception e;
@@ -1292,6 +1297,44 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
return r;
}
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
+{
+ struct dm_exception *e;
+ struct dm_snapshot *s = ti->private;
+ int r = DM_MAPIO_REMAPPED;
+ chunk_t chunk;
+
+ chunk = sector_to_chunk(s->store, bio->bi_sector);
+
+ down_read(&s->lock);
+
+ /* Full snapshots are not usable */
+ if (!s->valid) {
+ r = -EIO;
+ goto out_unlock;
+ }
+
+ /* If the block is already remapped - use that */
+ e = dm_lookup_exception(&s->complete, chunk);
+ if (e) {
+ remap_exception(s, e, bio, chunk);
+ goto out_unlock;
+ }
+
+ bio->bi_bdev = s->origin->bdev;
+
+ if (bio_rw(bio) == WRITE) {
+ up_write(&s->lock);
+ return do_origin(s->origin, bio);
+ }
+
+ out_unlock:
+ up_read(&s->lock);
+
+ return r;
+}
+
static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
int error, union map_info *map_context)
{
@@ -1442,6 +1485,10 @@ static int __origin_write(struct list_head *snapshots,
/* Do all the snapshots on this origin */
list_for_each_entry (snap, snapshots, list) {
+ /* Don't make new exceptions in a merging snapshot */
+ if (is_merge(snap->ti))
+ continue;
+
down_write(&snap->lock);
/* Only deal with valid and active snapshots */
@@ -1670,7 +1717,7 @@ static struct target_type merge_target = {
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
- .map = snapshot_map,
+ .map = snapshot_merge_map,
.end_io = snapshot_end_io,
.postsuspend = snapshot_postsuspend,
.preresume = snapshot_preresume,
--
1.6.5.2
From: Mikulas Patocka <[email protected]>
Merging more than one snapshot is not supported.
__find_merging_snapshot() will find the merging snapshot for a given
origin device.
Signed-off-by: Mikulas Patocka <[email protected]>
Signed-off-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-snap.c | 35 +++++++++++++++++++++++++++++++++--
1 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index b8838e7..f4e9aa6 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -332,9 +332,29 @@ out:
return count;
}
+static struct dm_snapshot *__find_merging_snapshot(struct block_device *origin)
+{
+ struct dm_snapshot *s, *merging_snap;
+ struct origin *o;
+
+ o = __lookup_origin(origin);
+ if (!o)
+ return NULL;
+
+ list_for_each_entry(s, &o->snapshots, list) {
+ if (is_merge(s->ti)) {
+ merging_snap = s;
+ break;
+ }
+ }
+
+ return merging_snap;
+}
+
static int __validate_exception_handover(struct dm_snapshot *snap)
{
struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+ struct block_device *bdev = snap->origin->bdev;
int r = 0;
/* Does snapshot need exceptions handed over to it? */
@@ -346,8 +366,19 @@ static int __validate_exception_handover(struct dm_snapshot *snap)
goto out;
}
- if (snap_src)
+ if (snap_src) {
+ if (is_merge(snap->ti)) {
+ /* Do not allow more than one merging snapshot */
+ if (__find_merging_snapshot(bdev)) {
+ snap->ti->error = "A snapshot is already "
+ "merging.";
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
r = 1;
+ }
out:
return r;
@@ -1325,7 +1356,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
bio->bi_bdev = s->origin->bdev;
if (bio_rw(bio) == WRITE) {
- up_write(&s->lock);
+ up_read(&s->lock);
return do_origin(s->origin, bio);
}
--
1.6.5.2
From: Mikulas Patocka <[email protected]>
Merging is started when origin is resumed and it is stopped when
origin is suspended or when the merging snapshot is destoyed.
We don't need a separate thread, kcopyd does the job just fine
(provided that we have a private kcopyd).
Merging is not yet interlocked with writes, so there is a race condition
with concurrent access. It will be fixed in further patches.
Adds a supporting function to decrement consecutive chunk counter.
Care is taken to increment the exception's old_chunk and new_chunk,
prior to the dm_consecutive_chunk_count_dec() call, if the chunk is at
the start of an exception's consecutive chunk range. This allows for
snapshot-merge to support chunks that are added to the 'complete'
exception hash table before existing chunks.
Signed-off-by: Mikulas Patocka <[email protected]>
Signed-off-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-exception-store.h | 11 +++
drivers/md/dm-snap.c | 179 +++++++++++++++++++++++++++++++++++++--
2 files changed, 184 insertions(+), 6 deletions(-)
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 534427f..7b83002 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -153,6 +153,13 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e)
BUG_ON(!dm_consecutive_chunk_count(e));
}
+static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e)
+{
+ BUG_ON(!dm_consecutive_chunk_count(e));
+
+ e->new_chunk -= (1ULL << DM_CHUNK_NUMBER_BITS);
+}
+
# else
# define DM_CHUNK_CONSECUTIVE_BITS 0
@@ -170,6 +177,10 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e)
{
}
+static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e)
+{
+}
+
# endif
/*
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index f4e9aa6..87c9033 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -101,6 +101,13 @@ struct dm_snapshot {
mempool_t *tracked_chunk_pool;
spinlock_t tracked_chunk_lock;
struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+
+ /* Merge operation is in progress */
+ int merge_running;
+
+ /* It is requested to shut down merging */
+ /* Cleared back to 0 when the merging is stopped */
+ int merge_shutdown;
};
struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
@@ -375,6 +382,14 @@ static int __validate_exception_handover(struct dm_snapshot *snap)
r = -EINVAL;
goto out;
}
+
+ if (!snap_src->store->type->prepare_merge ||
+ !snap_src->store->type->commit_merge) {
+ snap->ti->error = "Merging snapshot store must "
+ "support snapshot-merge";
+ r = -EINVAL;
+ goto out;
+ }
}
r = 1;
@@ -714,6 +729,123 @@ static int init_hash_tables(struct dm_snapshot *s)
return 0;
}
+static void merge_callback(int read_err, unsigned long write_err,
+ void *context);
+
+static void snapshot_merge_process(struct dm_snapshot *s)
+{
+ int r;
+ chunk_t old_chunk, new_chunk;
+ struct dm_exception *e;
+ struct dm_io_region src, dest;
+
+ BUG_ON(!s->merge_running);
+ if (s->merge_shutdown)
+ goto shut;
+
+ if (!s->valid) {
+ DMERR("snapshot is invalid, can't merge");
+ goto shut;
+ }
+
+ r = s->store->type->prepare_merge(s->store, &old_chunk, &new_chunk);
+ if (r <= 0) {
+ if (r < 0)
+ DMERR("Read error in exception store, "
+ "shutting down merge");
+ goto shut;
+ }
+
+ /* TODO: use larger I/O size once we verify that kcopyd handles it */
+
+ /* !!! FIXME: intelock writes to this chunk */
+ down_write(&s->lock);
+ e = dm_lookup_exception(&s->complete, old_chunk);
+ if (!e) {
+ DMERR("exception for block %llu is on disk but not in memory",
+ (unsigned long long)old_chunk);
+ up_write(&s->lock);
+ goto shut;
+ }
+ if (dm_consecutive_chunk_count(e)) {
+ if (old_chunk == e->old_chunk) {
+ e->old_chunk++;
+ e->new_chunk++;
+ } else if (old_chunk != e->old_chunk +
+ dm_consecutive_chunk_count(e)) {
+ DMERR("merge from the middle of a chunk range");
+ up_write(&s->lock);
+ goto shut;
+ }
+ dm_consecutive_chunk_count_dec(e);
+ } else {
+ dm_remove_exception(e);
+ free_completed_exception(e);
+ }
+ up_write(&s->lock);
+
+ dest.bdev = s->origin->bdev;
+ dest.sector = chunk_to_sector(s->store, old_chunk);
+ dest.count = min((sector_t)s->store->chunk_size,
+ get_dev_size(dest.bdev) - dest.sector);
+
+ src.bdev = s->cow->bdev;
+ src.sector = chunk_to_sector(s->store, new_chunk);
+ src.count = dest.count;
+
+ dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
+ return;
+
+shut:
+ s->merge_running = 0;
+}
+
+static void merge_callback(int read_err, unsigned long write_err, void *context)
+{
+ int r;
+ struct dm_snapshot *s = context;
+
+ if (read_err || write_err) {
+ if (read_err)
+ DMERR("Read error in data, shutting down merge");
+ else
+ DMERR("Write error in data, shutting down merge");
+ goto shut;
+ }
+
+ r = s->store->type->commit_merge(s->store, 1);
+ if (r < 0) {
+ DMERR("Write error in exception store, shutting down merge");
+ goto shut;
+ }
+
+ snapshot_merge_process(s);
+ return;
+
+shut:
+ s->merge_running = 0;
+}
+
+static void start_merge(struct dm_snapshot *s)
+{
+ if (!s->merge_running && !s->merge_shutdown) {
+ s->merge_running = 1;
+ snapshot_merge_process(s);
+ }
+}
+
+/*
+ * Stop the merging process and wait until it finishes.
+ */
+static void stop_merge(struct dm_snapshot *s)
+{
+ while (s->merge_running) {
+ s->merge_shutdown = 1;
+ msleep(1);
+ }
+ s->merge_shutdown = 0;
+}
+
/*
* Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
*/
@@ -778,6 +910,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
init_rwsem(&s->lock);
INIT_LIST_HEAD(&s->list);
spin_lock_init(&s->pe_lock);
+ s->merge_running = 0;
+ s->merge_shutdown = 0;
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
@@ -950,6 +1084,9 @@ static void snapshot_dtr(struct dm_target *ti)
}
up_read(&_origins_lock);
+ if (is_merge(ti))
+ stop_merge(s);
+
/* Prevent further origin writes from using this snapshot. */
/* After this returns there can be no new kcopyd jobs. */
unregister_snapshot(s);
@@ -1378,6 +1515,13 @@ static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
return 0;
}
+static void snapshot_merge_presuspend(struct dm_target *ti)
+{
+ struct dm_snapshot *s = ti->private;
+
+ stop_merge(s);
+}
+
static void snapshot_postsuspend(struct dm_target *ti)
{
struct dm_snapshot *s = ti->private;
@@ -1438,6 +1582,32 @@ static void snapshot_resume(struct dm_target *ti)
up_write(&s->lock);
}
+static chunk_t get_origin_minimum_chunksize(struct block_device *bdev)
+{
+ chunk_t min_chunksize;
+
+ down_read(&_origins_lock);
+
+ min_chunksize = __minimum_chunk_size(__lookup_origin(bdev));
+
+ up_read(&_origins_lock);
+
+ return min_chunksize;
+}
+
+static void snapshot_merge_resume(struct dm_target *ti)
+{
+ struct dm_snapshot *s = ti->private;
+
+ snapshot_resume(ti);
+ /*
+ * snapshot-merge can take on the role of the origin too
+ * - must adjust snapshot-merge's ti->split_io accordingly
+ */
+ ti->split_io = get_origin_minimum_chunksize(s->origin->bdev);
+ start_merge(s);
+}
+
static int snapshot_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
{
@@ -1682,11 +1852,7 @@ static void origin_resume(struct dm_target *ti)
{
struct dm_dev *dev = ti->private;
- down_read(&_origins_lock);
-
- ti->split_io = __minimum_chunk_size(__lookup_origin(dev->bdev));
-
- up_read(&_origins_lock);
+ ti->split_io = get_origin_minimum_chunksize(dev->bdev);
}
static int origin_status(struct dm_target *ti, status_type_t type, char *result,
@@ -1750,9 +1916,10 @@ static struct target_type merge_target = {
.dtr = snapshot_dtr,
.map = snapshot_merge_map,
.end_io = snapshot_end_io,
+ .presuspend = snapshot_merge_presuspend,
.postsuspend = snapshot_postsuspend,
.preresume = snapshot_preresume,
- .resume = snapshot_resume,
+ .resume = snapshot_merge_resume,
.status = snapshot_status,
.iterate_devices = snapshot_iterate_devices,
};
--
1.6.5.2
From: Mikulas Patocka <[email protected]>
Use new variables, 'merge_write_interlock' and 'merge_write_interlock_n',
to determine the chunk number (on the origin device) and number of chunks
that are being merged. Writes to this area are held on the
'merge_write_list' queue.
Signed-off-by: Mikulas Patocka <[email protected]>
Signed-off-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-snap.c | 120 ++++++++++++++++++++++++++++++++++++-------------
1 files changed, 88 insertions(+), 32 deletions(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 87c9033..e1ec4c4 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -108,6 +108,16 @@ struct dm_snapshot {
/* It is requested to shut down merging */
/* Cleared back to 0 when the merging is stopped */
int merge_shutdown;
+
+ /* Merging this area --- block any writes */
+ chunk_t merge_write_interlock;
+ int merge_write_interlock_n;
+
+ /*
+ * A list of requests that were delayed because
+ * of racing with merge
+ */
+ struct bio_list merge_write_list;
};
struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
@@ -729,6 +739,9 @@ static int init_hash_tables(struct dm_snapshot *s)
return 0;
}
+static void flush_bios(struct bio *bio);
+static void error_bios(struct bio *bio);
+
static void merge_callback(int read_err, unsigned long write_err,
void *context);
@@ -736,7 +749,6 @@ static void snapshot_merge_process(struct dm_snapshot *s)
{
int r;
chunk_t old_chunk, new_chunk;
- struct dm_exception *e;
struct dm_io_region src, dest;
BUG_ON(!s->merge_running);
@@ -758,32 +770,6 @@ static void snapshot_merge_process(struct dm_snapshot *s)
/* TODO: use larger I/O size once we verify that kcopyd handles it */
- /* !!! FIXME: intelock writes to this chunk */
- down_write(&s->lock);
- e = dm_lookup_exception(&s->complete, old_chunk);
- if (!e) {
- DMERR("exception for block %llu is on disk but not in memory",
- (unsigned long long)old_chunk);
- up_write(&s->lock);
- goto shut;
- }
- if (dm_consecutive_chunk_count(e)) {
- if (old_chunk == e->old_chunk) {
- e->old_chunk++;
- e->new_chunk++;
- } else if (old_chunk != e->old_chunk +
- dm_consecutive_chunk_count(e)) {
- DMERR("merge from the middle of a chunk range");
- up_write(&s->lock);
- goto shut;
- }
- dm_consecutive_chunk_count_dec(e);
- } else {
- dm_remove_exception(e);
- free_completed_exception(e);
- }
- up_write(&s->lock);
-
dest.bdev = s->origin->bdev;
dest.sector = chunk_to_sector(s->store, old_chunk);
dest.count = min((sector_t)s->store->chunk_size,
@@ -793,6 +779,13 @@ static void snapshot_merge_process(struct dm_snapshot *s)
src.sector = chunk_to_sector(s->store, new_chunk);
src.count = dest.count;
+ down_write(&s->lock);
+ s->merge_write_interlock = old_chunk;
+ s->merge_write_interlock_n = 1;
+ up_write(&s->lock);
+
+ /* !!! FIXME: wait until writes to this chunk drain */
+
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
return;
@@ -800,10 +793,25 @@ shut:
s->merge_running = 0;
}
+/* This function drops s->lock */
+static inline void release_write_interlock(struct dm_snapshot *s, int err)
+{
+ struct bio *b;
+ s->merge_write_interlock = 0;
+ s->merge_write_interlock_n = 0;
+ b = bio_list_get(&s->merge_write_list);
+ up_write(&s->lock);
+ if (!err)
+ flush_bios(b);
+ else
+ error_bios(b);
+}
+
static void merge_callback(int read_err, unsigned long write_err, void *context)
{
- int r;
+ int r, i;
struct dm_snapshot *s = context;
+ struct dm_exception *e;
if (read_err || write_err) {
if (read_err)
@@ -813,16 +821,51 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
goto shut;
}
- r = s->store->type->commit_merge(s->store, 1);
+ r = s->store->type->commit_merge(s->store, s->merge_write_interlock_n);
if (r < 0) {
DMERR("Write error in exception store, shutting down merge");
goto shut;
}
+ down_write(&s->lock);
+ /*
+ * Must process chunks (and associated exceptions) in reverse
+ * so that dm_consecutive_chunk_count_dec() accounting works
+ */
+ for (i = s->merge_write_interlock_n - 1; i >= 0; i--) {
+ chunk_t old_chunk = s->merge_write_interlock + i;
+ e = dm_lookup_exception(&s->complete, old_chunk);
+ if (!e) {
+ DMERR("exception for block %llu is on "
+ "disk but not in memory",
+ (unsigned long long)old_chunk);
+ up_write(&s->lock);
+ goto shut;
+ }
+ if (dm_consecutive_chunk_count(e)) {
+ if (old_chunk == e->old_chunk) {
+ e->old_chunk++;
+ e->new_chunk++;
+ } else if (old_chunk != e->old_chunk +
+ dm_consecutive_chunk_count(e)) {
+ DMERR("merge from the middle of a chunk range");
+ up_write(&s->lock);
+ goto shut;
+ }
+ dm_consecutive_chunk_count_dec(e);
+ } else {
+ dm_remove_exception(e);
+ free_completed_exception(e);
+ }
+ }
+ release_write_interlock(s, 0);
+
snapshot_merge_process(s);
return;
shut:
+ down_write(&s->lock);
+ release_write_interlock(s, 1);
s->merge_running = 0;
}
@@ -912,6 +955,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
spin_lock_init(&s->pe_lock);
s->merge_running = 0;
s->merge_shutdown = 0;
+ s->merge_write_interlock = 0;
+ s->merge_write_interlock_n = 0;
+ bio_list_init(&s->merge_write_list);
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
@@ -1475,7 +1521,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
chunk = sector_to_chunk(s->store, bio->bi_sector);
- down_read(&s->lock);
+ down_write(&s->lock);
/* Full snapshots are not usable */
if (!s->valid) {
@@ -1486,6 +1532,16 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
/* If the block is already remapped - use that */
e = dm_lookup_exception(&s->complete, chunk);
if (e) {
+ /* We are copying this area --- so don't write to it */
+ if (bio_rw(bio) == WRITE &&
+ chunk >= s->merge_write_interlock &&
+ chunk < (s->merge_write_interlock +
+ s->merge_write_interlock_n)) {
+ bio->bi_bdev = s->origin->bdev;
+ bio_list_add(&s->merge_write_list, bio);
+ r = DM_MAPIO_SUBMITTED;
+ goto out_unlock;
+ }
remap_exception(s, e, bio, chunk);
goto out_unlock;
}
@@ -1493,12 +1549,12 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
bio->bi_bdev = s->origin->bdev;
if (bio_rw(bio) == WRITE) {
- up_read(&s->lock);
+ up_write(&s->lock);
return do_origin(s->origin, bio);
}
out_unlock:
- up_read(&s->lock);
+ up_write(&s->lock);
return r;
}
--
1.6.5.2
From: Mikulas Patocka <[email protected]>
Track merging snapshot device's in-progress writes, to chunks that
were already remapped, and delay merging a chunk until all writes to
that chunk finish.
Signed-off-by: Mikulas Patocka <[email protected]>
Reviewed-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-snap.c | 7 ++++++-
1 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e1ec4c4..e41be70 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -784,7 +784,8 @@ static void snapshot_merge_process(struct dm_snapshot *s)
s->merge_write_interlock_n = 1;
up_write(&s->lock);
- /* !!! FIXME: wait until writes to this chunk drain */
+ while (__chunk_is_tracked(s, old_chunk))
+ msleep(1);
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
return;
@@ -1542,7 +1543,11 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
r = DM_MAPIO_SUBMITTED;
goto out_unlock;
}
+
remap_exception(s, e, bio, chunk);
+
+ if (bio_rw(bio) == WRITE)
+ map_context->ptr = track_chunk(s, chunk);
goto out_unlock;
}
--
1.6.5.2
From: Mikulas Patocka <[email protected]>
When there is one merging snapshot and other non-merging snapshots,
snapshot_merge_process() must make exceptions in the non-merging
snapshots.
Signed-off-by: Mikulas Patocka <[email protected]>
Signed-off-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-snap.c | 32 +++++++++++++++++++++++++++++++-
1 files changed, 31 insertions(+), 1 deletions(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index e41be70..2b5b083 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -255,6 +255,8 @@ struct origin {
static struct list_head *_origins;
static struct rw_semaphore _origins_lock;
+static DECLARE_WAIT_QUEUE_HEAD(_pending_exception_done);
+
static int init_origin_hash(void)
{
int i;
@@ -742,13 +744,18 @@ static int init_hash_tables(struct dm_snapshot *s)
static void flush_bios(struct bio *bio);
static void error_bios(struct bio *bio);
+static int __origin_write(struct list_head *snapshots,
+ sector_t sector, struct bio *bio);
+
static void merge_callback(int read_err, unsigned long write_err,
void *context);
static void snapshot_merge_process(struct dm_snapshot *s)
{
int r;
- chunk_t old_chunk, new_chunk;
+ chunk_t old_chunk, new_chunk, n;
+ struct origin *o;
+ int must_wait;
struct dm_io_region src, dest;
BUG_ON(!s->merge_running);
@@ -779,6 +786,27 @@ static void snapshot_merge_process(struct dm_snapshot *s)
src.sector = chunk_to_sector(s->store, new_chunk);
src.count = dest.count;
+test_again:
+ /* Reallocate other snapshots */
+ must_wait = 0;
+ /*
+ * Merging snapshot already has the origin's __minimum_chunk_size()
+ * stored in split_io (see: snapshot_merge_resume); avoid rediscovery
+ */
+ BUG_ON(!s->ti->split_io);
+ down_read(&_origins_lock);
+ o = __lookup_origin(s->origin->bdev);
+ for (n = 0; n < s->store->chunk_size; n += s->ti->split_io) {
+ r = __origin_write(&o->snapshots, dest.sector + n, NULL);
+ if (r == DM_MAPIO_SUBMITTED)
+ must_wait = 1;
+ }
+ up_read(&_origins_lock);
+ if (must_wait) {
+ sleep_on_timeout(&_pending_exception_done, HZ / 100 + 1);
+ goto test_again;
+ }
+
down_write(&s->lock);
s->merge_write_interlock = old_chunk;
s->merge_write_interlock_n = 1;
@@ -1301,6 +1329,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
origin_bios = bio_list_get(&pe->origin_bios);
free_pending_exception(pe);
+ wake_up_all(&_pending_exception_done);
+
up_write(&s->lock);
/* Submit any pending write bios */
--
1.6.5.2
From: Mikulas Patocka <[email protected]>
If we are merging an invalidated snapshot, redirect all accesses to the
origin device. This is safe because snapshot_merge_process() will
stop_merge() if the merging snapshot becomes invalid.
Allowing the origin device to remain functional is important because it
may contain the root filesystem.
Signed-off-by: Mikulas Patocka <[email protected]>
Reviewed-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-snap.c | 9 ++++-----
1 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 2b5b083..74f7f38 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1554,11 +1554,9 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
down_write(&s->lock);
- /* Full snapshots are not usable */
- if (!s->valid) {
- r = -EIO;
- goto out_unlock;
- }
+ /* Full merging snapshots are redirected to the origin */
+ if (!s->valid)
+ goto redirect_to_origin;
/* If the block is already remapped - use that */
e = dm_lookup_exception(&s->complete, chunk);
@@ -1581,6 +1579,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
goto out_unlock;
}
+ redirect_to_origin:
bio->bi_bdev = s->origin->bdev;
if (bio_rw(bio) == WRITE) {
--
1.6.5.2
s->store->type->prepare_merge returns the number of chunks that can be
linearly copied starting from the returned chunk number backward (but
the caller is allowed to copy less, and the caller passes the number of
copied chunks to s->store->type->commit_merge).
I.e. if returned chunk numbers are old_chunk == 10 and new_chunk == 20
and returned value is 3, then chunk 20 can be copied to 10, chunk 19 to
9 and 18 to 8.
s->merge_write_interlock_n has been increased up to the full range of
chunks returned from s->store->type->prepare_merge. Until now kcopyd
was only ever allowed to copy one chunk at a time; as a result
snapshot-merge performance was extremely slow.
Relative to the snapshot target, snapshot-merge's performance is now
comparable.
Also, snapshot_merge_process() needs to delay the merging of _all_
chunks that have in-progress writes; not just the first chunk in the
region that is to be merged.
Signed-off-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-snap.c | 35 +++++++++++++++++++++++------------
1 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 74f7f38..4d6387e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -752,11 +752,12 @@ static void merge_callback(int read_err, unsigned long write_err,
static void snapshot_merge_process(struct dm_snapshot *s)
{
- int r;
+ int r, i, linear_chunks;
chunk_t old_chunk, new_chunk, n;
struct origin *o;
int must_wait;
struct dm_io_region src, dest;
+ sector_t io_size;
BUG_ON(!s->merge_running);
if (s->merge_shutdown)
@@ -767,27 +768,34 @@ static void snapshot_merge_process(struct dm_snapshot *s)
goto shut;
}
- r = s->store->type->prepare_merge(s->store, &old_chunk, &new_chunk);
- if (r <= 0) {
- if (r < 0)
+ linear_chunks = s->store->type->prepare_merge(s->store,
+ &old_chunk, &new_chunk);
+ if (linear_chunks <= 0) {
+ if (linear_chunks < 0)
DMERR("Read error in exception store, "
"shutting down merge");
goto shut;
}
+ /* Adjust old_chunk and new_chunk to reflect start of linear region */
+ old_chunk = old_chunk + 1 - linear_chunks;
+ new_chunk = new_chunk + 1 - linear_chunks;
- /* TODO: use larger I/O size once we verify that kcopyd handles it */
+ /*
+ * Use one (potentially large) I/O to copy all 'linear_chunks'
+ * from the exception store to the origin
+ */
+ io_size = linear_chunks * s->store->chunk_size;
dest.bdev = s->origin->bdev;
dest.sector = chunk_to_sector(s->store, old_chunk);
- dest.count = min((sector_t)s->store->chunk_size,
- get_dev_size(dest.bdev) - dest.sector);
+ dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
src.bdev = s->cow->bdev;
src.sector = chunk_to_sector(s->store, new_chunk);
src.count = dest.count;
test_again:
- /* Reallocate other snapshots */
+ /* Reallocate other snapshots; must account for all 'linear_chunks' */
must_wait = 0;
/*
* Merging snapshot already has the origin's __minimum_chunk_size()
@@ -796,7 +804,7 @@ test_again:
BUG_ON(!s->ti->split_io);
down_read(&_origins_lock);
o = __lookup_origin(s->origin->bdev);
- for (n = 0; n < s->store->chunk_size; n += s->ti->split_io) {
+ for (n = 0; n < io_size; n += s->ti->split_io) {
r = __origin_write(&o->snapshots, dest.sector + n, NULL);
if (r == DM_MAPIO_SUBMITTED)
must_wait = 1;
@@ -809,11 +817,14 @@ test_again:
down_write(&s->lock);
s->merge_write_interlock = old_chunk;
- s->merge_write_interlock_n = 1;
+ s->merge_write_interlock_n = linear_chunks;
up_write(&s->lock);
- while (__chunk_is_tracked(s, old_chunk))
- msleep(1);
+ /* Wait until writes to all 'linear_chunks' drain */
+ for (i = 0; i < linear_chunks; i++) {
+ while (__chunk_is_tracked(s, old_chunk + i))
+ msleep(1);
+ }
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
return;
--
1.6.5.2
On Fri, Nov 20 2009 at 3:27pm -0500,
Mike Snitzer <[email protected]> wrote:
> If a write request to a merging snapshot device is to be dispatched
> directly to the origin (because the chunk is not remapped or was already
> merged), snapshot_merge_map() must make exceptions in other snapshots.
...
> +static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
> + union map_info *map_context)
> +{
> + struct dm_exception *e;
> + struct dm_snapshot *s = ti->private;
> + int r = DM_MAPIO_REMAPPED;
> + chunk_t chunk;
> +
> + chunk = sector_to_chunk(s->store, bio->bi_sector);
> +
> + down_read(&s->lock);
> +
> + /* Full snapshots are not usable */
> + if (!s->valid) {
> + r = -EIO;
> + goto out_unlock;
> + }
> +
> + /* If the block is already remapped - use that */
> + e = dm_lookup_exception(&s->complete, chunk);
> + if (e) {
> + remap_exception(s, e, bio, chunk);
> + goto out_unlock;
> + }
> +
> + bio->bi_bdev = s->origin->bdev;
> +
> + if (bio_rw(bio) == WRITE) {
> + up_write(&s->lock);
> + return do_origin(s->origin, bio);
> + }
OK, I caught this in review but clearly didn't refresh the patch; the
above up_write() should be up_read().
Mike
From: Mikulas Patocka <[email protected]>
The snapshot-merge target should not allocate new exceptions because the
intent is to merge all of its exceptions as quickly and safely as
possible.
Introduce new method, snapshot_merge_map(), that won't allocate
exceptions. Modify __origin_write() so that it doesn't allocate
exceptions in merging snapshots.
If a write request to a merging snapshot device is to be dispatched
directly to the origin (because the chunk is not remapped or was already
merged), snapshot_merge_map() must make exceptions in other snapshots.
Signed-off-by: Mikulas Patocka <[email protected]>
Signed-off-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-snap.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 48 insertions(+), 1 deletion(-)
Index: linux-2.6/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-snap.c
+++ linux-2.6/drivers/md/dm-snap.c
@@ -127,6 +127,11 @@ static int bdev_equal(struct block_devic
return lhs == rhs;
}
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context);
+
+#define is_merge(ti) ((ti)->type->map == snapshot_merge_map)
+
struct dm_snap_pending_exception {
struct dm_exception e;
@@ -1292,6 +1297,44 @@ static int snapshot_map(struct dm_target
return r;
}
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
+{
+ struct dm_exception *e;
+ struct dm_snapshot *s = ti->private;
+ int r = DM_MAPIO_REMAPPED;
+ chunk_t chunk;
+
+ chunk = sector_to_chunk(s->store, bio->bi_sector);
+
+ down_read(&s->lock);
+
+ /* Full snapshots are not usable */
+ if (!s->valid) {
+ r = -EIO;
+ goto out_unlock;
+ }
+
+ /* If the block is already remapped - use that */
+ e = dm_lookup_exception(&s->complete, chunk);
+ if (e) {
+ remap_exception(s, e, bio, chunk);
+ goto out_unlock;
+ }
+
+ bio->bi_bdev = s->origin->bdev;
+
+ if (bio_rw(bio) == WRITE) {
+ up_read(&s->lock);
+ return do_origin(s->origin, bio);
+ }
+
+ out_unlock:
+ up_read(&s->lock);
+
+ return r;
+}
+
static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
int error, union map_info *map_context)
{
@@ -1442,6 +1485,10 @@ static int __origin_write(struct list_he
/* Do all the snapshots on this origin */
list_for_each_entry (snap, snapshots, list) {
+ /* Don't make new exceptions in a merging snapshot */
+ if (is_merge(snap->ti))
+ continue;
+
down_write(&snap->lock);
/* Only deal with valid and active snapshots */
@@ -1670,7 +1717,7 @@ static struct target_type merge_target =
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
- .map = snapshot_map,
+ .map = snapshot_merge_map,
.end_io = snapshot_end_io,
.postsuspend = snapshot_postsuspend,
.preresume = snapshot_preresume,
From: Mikulas Patocka <[email protected]>
Merging more than one snapshot is not supported.
__find_merging_snapshot() will find the merging snapshot for a given
origin device.
Signed-off-by: Mikulas Patocka <[email protected]>
Signed-off-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-snap.c | 33 ++++++++++++++++++++++++++++++++-
1 file changed, 32 insertions(+), 1 deletion(-)
Index: linux-2.6/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-snap.c
+++ linux-2.6/drivers/md/dm-snap.c
@@ -332,9 +332,29 @@ out:
return count;
}
+static struct dm_snapshot *__find_merging_snapshot(struct block_device *origin)
+{
+ struct dm_snapshot *s, *merging_snap;
+ struct origin *o;
+
+ o = __lookup_origin(origin);
+ if (!o)
+ return NULL;
+
+ list_for_each_entry(s, &o->snapshots, list) {
+ if (is_merge(s->ti)) {
+ merging_snap = s;
+ break;
+ }
+ }
+
+ return merging_snap;
+}
+
static int __validate_exception_handover(struct dm_snapshot *snap)
{
struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+ struct block_device *bdev = snap->origin->bdev;
int r = 0;
/* Does snapshot need exceptions handed over to it? */
@@ -346,8 +366,19 @@ static int __validate_exception_handover
goto out;
}
- if (snap_src)
+ if (snap_src) {
+ if (is_merge(snap->ti)) {
+ /* Do not allow more than one merging snapshot */
+ if (__find_merging_snapshot(bdev)) {
+ snap->ti->error = "A snapshot is already "
+ "merging.";
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
r = 1;
+ }
out:
return r;