This patch set adds error correction support to dm-verity, which
makes it possible to recover from data corruption in exchange of
increased space overhead.
The feature is implemented as part of dm-verity to take advantage
of the existing hash tree to improve performance and locate known
erasures.
Sami Tolvanen (4):
dm verity: clean up duplicate hashing code
dm verity: separate function for parsing opt args
dm verity: add support for forward error correction
dm verity: ignore zero blocks
Documentation/device-mapper/verity.txt | 34 ++
drivers/md/dm-verity.c | 1004 +++++++++++++++++++++++++++-----
2 files changed, 892 insertions(+), 146 deletions(-)
--
2.6.0.rc2.230.g3dd15c0
Handle dm-verity salting in one place to simplify the code.
Signed-off-by: Sami Tolvanen <[email protected]>
---
drivers/md/dm-verity.c | 262 +++++++++++++++++++++++++++----------------------
1 file changed, 147 insertions(+), 115 deletions(-)
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index edc624b..487cb66 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -173,6 +173,84 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
return block >> (level * v->hash_per_block_bits);
}
+/*
+ * Wrapper for crypto_shash_init, which handles verity salting.
+ */
+static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc)
+{
+ int r;
+
+ desc->tfm = v->tfm;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ r = crypto_shash_init(desc);
+
+ if (unlikely(r < 0)) {
+ DMERR("crypto_shash_init failed: %d", r);
+ return r;
+ }
+
+ if (likely(v->version >= 1)) {
+ r = crypto_shash_update(desc, v->salt, v->salt_size);
+
+ if (unlikely(r < 0)) {
+ DMERR("crypto_shash_update failed: %d", r);
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc,
+ const u8 *data, size_t len)
+{
+ int r = crypto_shash_update(desc, data, len);
+
+ if (unlikely(r < 0))
+ DMERR("crypto_shash_update failed: %d", r);
+
+ return r;
+}
+
+static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc,
+ u8 *digest)
+{
+ int r;
+
+ if (unlikely(!v->version)) {
+ r = crypto_shash_update(desc, v->salt, v->salt_size);
+
+ if (r < 0) {
+ DMERR("crypto_shash_update failed: %d", r);
+ return r;
+ }
+ }
+
+ r = crypto_shash_final(desc, digest);
+
+ if (unlikely(r < 0))
+ DMERR("crypto_shash_final failed: %d", r);
+
+ return r;
+}
+
+static int verity_hash(struct dm_verity *v, struct shash_desc *desc,
+ const u8 *data, size_t len, u8 *digest)
+{
+ int r;
+
+ r = verity_hash_init(v, desc);
+ if (unlikely(r < 0))
+ return r;
+
+ r = verity_hash_update(v, desc, data, len);
+ if (unlikely(r < 0))
+ return r;
+
+ return verity_hash_final(v, desc, digest);
+}
+
static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
sector_t *hash_block, unsigned *offset)
{
@@ -253,10 +331,10 @@ out:
* If "skip_unverified" is false, unverified buffer is hashed and verified
* against current value of io_want_digest(v, io).
*/
-static int verity_verify_level(struct dm_verity_io *io, sector_t block,
- int level, bool skip_unverified)
+static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
+ sector_t block, int level, bool skip_unverified,
+ u8 *want_digest)
{
- struct dm_verity *v = io->v;
struct dm_buffer *buf;
struct buffer_aux *aux;
u8 *data;
@@ -273,75 +351,72 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block,
aux = dm_bufio_get_aux_data(buf);
if (!aux->hash_verified) {
- struct shash_desc *desc;
- u8 *result;
-
if (skip_unverified) {
r = 1;
goto release_ret_r;
}
- desc = io_hash_desc(v, io);
- desc->tfm = v->tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- r = crypto_shash_init(desc);
- if (r < 0) {
- DMERR("crypto_shash_init failed: %d", r);
+ r = verity_hash(v, io_hash_desc(v, io),
+ data, 1 << v->hash_dev_block_bits,
+ io_real_digest(v, io));
+ if (unlikely(r < 0))
goto release_ret_r;
- }
-
- if (likely(v->version >= 1)) {
- r = crypto_shash_update(desc, v->salt, v->salt_size);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- goto release_ret_r;
- }
- }
- r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- goto release_ret_r;
- }
-
- if (!v->version) {
- r = crypto_shash_update(desc, v->salt, v->salt_size);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- goto release_ret_r;
- }
- }
-
- result = io_real_digest(v, io);
- r = crypto_shash_final(desc, result);
- if (r < 0) {
- DMERR("crypto_shash_final failed: %d", r);
+ if (likely(memcmp(io_real_digest(v, io), want_digest,
+ v->digest_size) == 0))
+ aux->hash_verified = 1;
+ else if (verity_handle_err(v,
+ DM_VERITY_BLOCK_TYPE_METADATA,
+ hash_block)) {
+ r = -EIO;
goto release_ret_r;
}
- if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
- if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA,
- hash_block)) {
- r = -EIO;
- goto release_ret_r;
- }
- } else
- aux->hash_verified = 1;
}
data += offset;
-
- memcpy(io_want_digest(v, io), data, v->digest_size);
-
- dm_bufio_release(buf);
- return 0;
+ memcpy(want_digest, data, v->digest_size);
+ r = 0;
release_ret_r:
dm_bufio_release(buf);
-
return r;
}
/*
+ * Find a hash for a given block, write it to digest and verify the integrity
+ * of the hash tree if necessary.
+ */
+static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
+ sector_t block, u8 *digest)
+{
+ int i;
+ int r;
+
+ if (likely(v->levels)) {
+ /*
+ * First, we try to get the requested hash for
+ * the current block. If the hash block itself is
+ * verified, zero is returned. If it isn't, this
+ * function returns 1 and we fall back to whole
+ * chain verification.
+ */
+ r = verity_verify_level(v, io, block, 0, true, digest);
+ if (likely(r <= 0))
+ return r;
+ }
+
+ memcpy(digest, v->root_digest, v->digest_size);
+
+ for (i = v->levels - 1; i >= 0; i--) {
+ r = verity_verify_level(v, io, block, i, false, digest);
+ if (unlikely(r))
+ return r;
+ }
+
+ return 0;
+}
+
+/*
* Verify one "dm_verity_io" structure.
*/
static int verity_verify_io(struct dm_verity_io *io)
@@ -350,54 +425,21 @@ static int verity_verify_io(struct dm_verity_io *io)
struct bio *bio = dm_bio_from_per_bio_data(io,
v->ti->per_bio_data_size);
unsigned b;
- int i;
for (b = 0; b < io->n_blocks; b++) {
- struct shash_desc *desc;
- u8 *result;
int r;
unsigned todo;
+ struct shash_desc *desc = io_hash_desc(v, io);
- if (likely(v->levels)) {
- /*
- * First, we try to get the requested hash for
- * the current block. If the hash block itself is
- * verified, zero is returned. If it isn't, this
- * function returns 0 and we fall back to whole
- * chain verification.
- */
- int r = verity_verify_level(io, io->block + b, 0, true);
- if (likely(!r))
- goto test_block_hash;
- if (r < 0)
- return r;
- }
-
- memcpy(io_want_digest(v, io), v->root_digest, v->digest_size);
-
- for (i = v->levels - 1; i >= 0; i--) {
- int r = verity_verify_level(io, io->block + b, i, false);
- if (unlikely(r))
- return r;
- }
+ r = verity_hash_for_block(v, io, io->block + b,
+ io_want_digest(v, io));
+ if (unlikely(r < 0))
+ return r;
-test_block_hash:
- desc = io_hash_desc(v, io);
- desc->tfm = v->tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- r = crypto_shash_init(desc);
- if (r < 0) {
- DMERR("crypto_shash_init failed: %d", r);
+ r = verity_hash_init(v, desc);
+ if (unlikely(r < 0))
return r;
- }
- if (likely(v->version >= 1)) {
- r = crypto_shash_update(desc, v->salt, v->salt_size);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- return r;
- }
- }
todo = 1 << v->data_dev_block_bits;
do {
u8 *page;
@@ -408,37 +450,27 @@ test_block_hash:
len = bv.bv_len;
if (likely(len >= todo))
len = todo;
- r = crypto_shash_update(desc, page + bv.bv_offset, len);
+ r = verity_hash_update(v, desc, page + bv.bv_offset,
+ len);
kunmap_atomic(page);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
+ if (unlikely(r < 0))
return r;
- }
bio_advance_iter(bio, &io->iter, len);
todo -= len;
} while (todo);
- if (!v->version) {
- r = crypto_shash_update(desc, v->salt, v->salt_size);
- if (r < 0) {
- DMERR("crypto_shash_update failed: %d", r);
- return r;
- }
- }
-
- result = io_real_digest(v, io);
- r = crypto_shash_final(desc, result);
- if (r < 0) {
- DMERR("crypto_shash_final failed: %d", r);
+ r = verity_hash_final(v, desc, io_real_digest(v, io));
+ if (unlikely(r < 0))
return r;
- }
- if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
- if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
- io->block + b))
- return -EIO;
- }
+
+ if (likely(memcmp(io_real_digest(v, io),
+ io_want_digest(v, io), v->digest_size) == 0))
+ continue;
+ else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
+ io->block + b))
+ return -EIO;
}
return 0;
--
2.6.0.rc2.230.g3dd15c0
Move optional argument parsing into a separate function to make it
easier to add more of them without making verity_ctr even longer.
Signed-off-by: Sami Tolvanen <[email protected]>
---
drivers/md/dm-verity.c | 31 ++++++++++++++++++++++---------
1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 487cb66..da76f77 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -34,6 +34,8 @@
#define DM_VERITY_OPT_LOGGING "ignore_corruption"
#define DM_VERITY_OPT_RESTART "restart_on_corruption"
+#define DM_VERITY_OPTS_MAX 1
+
static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
@@ -725,6 +727,21 @@ static void verity_dtr(struct dm_target *ti)
kfree(v);
}
+static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
+ const char *opt_string)
+{
+ if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING)) {
+ v->mode = DM_VERITY_MODE_LOGGING;
+ return 0;
+ } else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART)) {
+ v->mode = DM_VERITY_MODE_RESTART;
+ return 0;
+ }
+
+ v->ti->error = "Invalid feature arguments";
+ return -EINVAL;
+}
+
/*
* Target parameters:
* <version> The current format is version 1.
@@ -752,7 +769,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
char dummy;
static struct dm_arg _args[] = {
- {0, 1, "Invalid number of feature args"},
+ {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"},
};
v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
@@ -912,15 +929,11 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
- if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING))
- v->mode = DM_VERITY_MODE_LOGGING;
- else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART))
- v->mode = DM_VERITY_MODE_RESTART;
- else {
- ti->error = "Invalid feature arguments";
- r = -EINVAL;
+ r = verity_parse_opt_args(&as, v, opt_string);
+ if (r < 0)
goto bad;
- }
+
+ opt_params -= r;
}
}
--
2.6.0.rc2.230.g3dd15c0
Add support for correcting corrupted blocks using Reed-Solomon.
This code uses RS(255, N) interleaved across data and hash
blocks. Each error-correcting block covers N bytes evenly
distributed across the combined total data, so that each byte is a
maximum distance away from the others. This makes it possible to
recover from several consecutive corrupted blocks with relatively
small space overhead.
In addition, using verity hashes to locate erasures nearly doubles
the effectiveness of error correction. Being able to detect
corrupted blocks also improves performance, because only corrupted
blocks need to corrected.
For a 2 GiB partition, RS(255, 253) (two parity bytes for each
253-byte block) can correct up to 16 MiB of consecutive corrupted
blocks if erasures can be located, and 8 MiB if they cannot, with
16 MiB space overhead.
Signed-off-by: Sami Tolvanen <[email protected]>
---
Documentation/device-mapper/verity.txt | 29 ++
drivers/md/dm-verity.c | 689 ++++++++++++++++++++++++++++++---
2 files changed, 673 insertions(+), 45 deletions(-)
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
index e15bc1a..3628d28 100644
--- a/Documentation/device-mapper/verity.txt
+++ b/Documentation/device-mapper/verity.txt
@@ -79,6 +79,35 @@ restart_on_corruption
not compatible with ignore_corruption and requires user space support to
avoid restart loops.
+use_fec_from_device
+ Use forward error correction (FEC) to recover from corruption if hash
+ verification fails. Use encoding data from the specified device. This
+ may be the same device where data and hash blocks reside, in which case
+ fec_start must be outside data and hash areas.
+
+ If the encoding data covers additional metadata, it must be accessible
+ on the hash device after the hash blocks.
+
+ Note: block sizes for data and hash devices must match.
+
+ A command line tool for generating encoding data is available from the
+ Android Open Source Project:
+ https://android.googlesource.com/platform/system/extras/+/master/verity/fec/
+
+fec_roots
+ Number of generator roots. This equals to the number of parity bytes in
+ the encoding data. For example, in RS(M, N) encoding, the number of roots
+ is M-N.
+
+fec_blocks
+ The number of encoding data blocks on the FEC device. The block size for
+ the FEC device is <data_block_size>.
+
+fec_start
+ This is the offset, in <data_block_size> blocks, from the start of the
+ FEC device to the beginning of the encoding data.
+
+
Theory of operation
===================
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index da76f77..61dec39 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2012 Red Hat, Inc.
+ * Copyright (C) 2015 Google, Inc.
*
* Author: Mikulas Patocka <[email protected]>
*
@@ -19,6 +20,8 @@
#include <linux/module.h>
#include <linux/device-mapper.h>
#include <linux/reboot.h>
+#include <linux/rslib.h>
+#include <linux/vmalloc.h>
#include <crypto/hash.h>
#define DM_MSG_PREFIX "verity"
@@ -31,10 +34,18 @@
#define DM_VERITY_MAX_LEVELS 63
#define DM_VERITY_MAX_CORRUPTED_ERRS 100
+#define DM_VERITY_FEC_RSM 255
+
#define DM_VERITY_OPT_LOGGING "ignore_corruption"
#define DM_VERITY_OPT_RESTART "restart_on_corruption"
-#define DM_VERITY_OPTS_MAX 1
+#define DM_VERITY_OPT_FEC_DEV "use_fec_from_device"
+#define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks"
+#define DM_VERITY_OPT_FEC_START "fec_start"
+#define DM_VERITY_OPT_FEC_ROOTS "fec_roots"
+
+#define DM_VERITY_OPTS_FEC 8
+#define DM_VERITY_OPTS_MAX (1 + DM_VERITY_OPTS_FEC)
static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
@@ -54,8 +65,11 @@ enum verity_block_type {
struct dm_verity {
struct dm_dev *data_dev;
struct dm_dev *hash_dev;
+ struct dm_dev *fec_dev;
struct dm_target *ti;
- struct dm_bufio_client *bufio;
+ struct dm_bufio_client *data_bufio;
+ struct dm_bufio_client *hash_bufio;
+ struct dm_bufio_client *fec_bufio;
char *alg_name;
struct crypto_shash *tfm;
u8 *root_digest; /* digest of the root block */
@@ -65,11 +79,17 @@ struct dm_verity {
sector_t hash_start; /* hash start in blocks */
sector_t data_blocks; /* the number of data blocks */
sector_t hash_blocks; /* the number of hash blocks */
+ sector_t fec_start; /* FEC data start in blocks */
+ sector_t fec_blocks; /* number of blocks covered by FEC */
+ sector_t fec_rounds; /* number of FEC rounds */
+ sector_t fec_hash_blocks; /* blocks after hash_start */
unsigned char data_dev_block_bits; /* log2(data blocksize) */
unsigned char hash_dev_block_bits; /* log2(hash blocksize) */
unsigned char hash_per_block_bits; /* log2(hashes in hash block) */
unsigned char levels; /* the number of tree levels */
unsigned char version;
+ unsigned char fec_roots;/* number of parity bytes, M-N of RS(M, N) */
+ unsigned char fec_rsn; /* N of RS(M, N) */
unsigned digest_size; /* digest size for the current hash algorithm */
unsigned shash_descsize;/* the size of temporary space for crypto */
int hash_failed; /* set to 1 if hash of any block failed */
@@ -96,6 +116,11 @@ struct dm_verity_io {
struct work_struct work;
+ struct rs_control *rs;
+ int *erasures;
+ size_t fec_pos;
+ u8 *fec_buf;
+
/*
* Three variably-size fields follow this struct:
*
@@ -148,7 +173,7 @@ struct buffer_aux {
/*
* Initialize struct buffer_aux for a freshly created buffer.
*/
-static void dm_bufio_alloc_callback(struct dm_buffer *buf)
+static void dm_hash_bufio_alloc_callback(struct dm_buffer *buf)
{
struct buffer_aux *aux = dm_bufio_get_aux_data(buf);
@@ -322,6 +347,10 @@ out:
return 1;
}
+static int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+ enum verity_block_type type, sector_t block,
+ u8 *dest, struct bvec_iter *iter);
+
/*
* Verify hash of a metadata block pertaining to the specified data block
* ("block" argument) at a specified level ("level" argument).
@@ -346,7 +375,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
verity_hash_at_level(v, block, level, &hash_block, &offset);
- data = dm_bufio_read(v->bufio, hash_block, &buf);
+ data = dm_bufio_read(v->hash_bufio, hash_block, &buf);
if (IS_ERR(data))
return PTR_ERR(data);
@@ -367,6 +396,10 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
if (likely(memcmp(io_real_digest(v, io), want_digest,
v->digest_size) == 0))
aux->hash_verified = 1;
+ else if (verity_fec_decode(v, io,
+ DM_VERITY_BLOCK_TYPE_METADATA,
+ hash_block, data, NULL) == 0)
+ aux->hash_verified = 1;
else if (verity_handle_err(v,
DM_VERITY_BLOCK_TYPE_METADATA,
hash_block)) {
@@ -391,8 +424,7 @@ release_ret_r:
static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, u8 *digest)
{
- int i;
- int r;
+ int r, i;
if (likely(v->levels)) {
/*
@@ -419,18 +451,62 @@ static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
}
/*
+ * Calls function f for 1 << v->data_dev_block_bits bytes in io->io_vec
+ * starting from (vector, offset). Assumes io->io_vec has enough data to
+ * process.
+ */
+static int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
+ struct bvec_iter *iter,
+ int (*process)(struct dm_verity *v,
+ struct dm_verity_io *io,
+ u8 *data, size_t len))
+{
+ unsigned todo = 1 << v->data_dev_block_bits;
+ struct bio *bio = dm_bio_from_per_bio_data(io,
+ v->ti->per_bio_data_size);
+
+ do {
+ int r;
+ u8 *page;
+ unsigned len;
+ struct bio_vec bv = bio_iter_iovec(bio, *iter);
+
+ page = kmap_atomic(bv.bv_page);
+ len = bv.bv_len;
+
+ if (likely(len >= todo))
+ len = todo;
+
+ r = process(v, io, page + bv.bv_offset, len);
+ kunmap_atomic(page);
+
+ if (r < 0)
+ return r;
+
+ bio_advance_iter(bio, iter, len);
+ todo -= len;
+ } while (todo);
+
+ return 0;
+}
+
+static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *data, size_t len)
+{
+ return verity_hash_update(v, io_hash_desc(v, io), data, len);
+}
+
+/*
* Verify one "dm_verity_io" structure.
*/
static int verity_verify_io(struct dm_verity_io *io)
{
struct dm_verity *v = io->v;
- struct bio *bio = dm_bio_from_per_bio_data(io,
- v->ti->per_bio_data_size);
+ struct bvec_iter start;
unsigned b;
for (b = 0; b < io->n_blocks; b++) {
int r;
- unsigned todo;
struct shash_desc *desc = io_hash_desc(v, io);
r = verity_hash_for_block(v, io, io->block + b,
@@ -442,36 +518,24 @@ static int verity_verify_io(struct dm_verity_io *io)
if (unlikely(r < 0))
return r;
- todo = 1 << v->data_dev_block_bits;
- do {
- u8 *page;
- unsigned len;
- struct bio_vec bv = bio_iter_iovec(bio, io->iter);
-
- page = kmap_atomic(bv.bv_page);
- len = bv.bv_len;
- if (likely(len >= todo))
- len = todo;
- r = verity_hash_update(v, desc, page + bv.bv_offset,
- len);
- kunmap_atomic(page);
-
- if (unlikely(r < 0))
- return r;
-
- bio_advance_iter(bio, &io->iter, len);
- todo -= len;
- } while (todo);
+ start = io->iter;
+ r = verity_for_bv_block(v, io, &io->iter,
+ verity_bv_hash_update);
+ if (unlikely(r < 0))
+ return r;
r = verity_hash_final(v, desc, io_real_digest(v, io));
if (unlikely(r < 0))
return r;
if (likely(memcmp(io_real_digest(v, io),
- io_want_digest(v, io), v->digest_size) == 0))
+ io_want_digest(v, io), v->digest_size) == 0))
+ continue;
+ else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
+ io->block + b, NULL, &start) == 0)
continue;
else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
- io->block + b))
+ io->block + b))
return -EIO;
}
@@ -479,6 +543,345 @@ static int verity_verify_io(struct dm_verity_io *io)
}
/*
+ * Returns an interleaved offset for a byte in RS block.
+ */
+static inline u64 verity_fec_interleave(struct dm_verity *v, u64 offset)
+{
+ u32 mod;
+
+ mod = do_div(offset, v->fec_rsn);
+ return offset + mod * (v->fec_rounds << v->data_dev_block_bits);
+}
+
+/*
+ * Decode a block using Reed-Solomon.
+ */
+static int verity_fec_decode_rs8(struct dm_verity *v,
+ struct dm_verity_io *io, u8 *data, u8 *fec,
+ int neras)
+{
+ int i;
+ uint16_t par[v->fec_roots];
+
+ for (i = 0; i < v->fec_roots; i++)
+ par[i] = fec[i];
+
+ return decode_rs8(io->rs, data, par, v->fec_rsn, NULL, neras,
+ io->erasures, 0, NULL);
+}
+
+/*
+ * Read error-correcting codes for the requested RS block. Returns a pointer
+ * to the data block. Caller is responsible for releasing buf.
+ */
+static u8 *verity_fec_read_par(struct dm_verity *v, u64 rsb, int index,
+ unsigned *offset, struct dm_buffer **buf)
+{
+ u64 block;
+ u8 *res;
+
+ block = (index + rsb) * v->fec_roots >> v->data_dev_block_bits;
+
+ *offset = (unsigned)((block << v->data_dev_block_bits) -
+ (index + rsb) * v->fec_roots);
+
+ res = dm_bufio_read(v->fec_bufio, v->fec_start + block, buf);
+
+ if (unlikely(IS_ERR(res))) {
+ DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
+ v->data_dev->name, (unsigned long long)rsb,
+ (unsigned long long)(v->fec_start + block),
+ PTR_ERR(res));
+ *buf = NULL;
+ return NULL;
+ }
+
+ return res;
+}
+
+/*
+ * Decode 1 << v->data_dev_block_bits FEC blocks from io->fec_buf and copy the
+ * corrected 'index' block to the beginning of the buffer.
+ */
+static int verity_fec_decode_buf(struct dm_verity *v, struct dm_verity_io *io,
+ u64 rsb, int index, int neras)
+{
+ int r = -1, corrected = 0, i, res;
+ struct dm_buffer *buf;
+ unsigned offset;
+ u8 *par;
+
+ par = verity_fec_read_par(v, rsb, 0, &offset, &buf);
+ if (unlikely(!par))
+ return r;
+
+ for (i = 0; i < 1 << v->data_dev_block_bits; i++) {
+ if (offset >= 1 << v->data_dev_block_bits) {
+ dm_bufio_release(buf);
+
+ par = verity_fec_read_par(v, rsb, i, &offset, &buf);
+ if (unlikely(!par))
+ return r;
+ }
+
+ res = verity_fec_decode_rs8(v, io,
+ &io->fec_buf[i * v->fec_rsn], &par[offset],
+ neras);
+
+ if (res < 0)
+ goto out;
+
+ corrected += res;
+ offset += v->fec_roots;
+
+ /* copy corrected block to the beginning of fec_buf */
+ io->fec_buf[i] = io->fec_buf[i * v->fec_rsn + index];
+ }
+
+ r = corrected;
+
+out:
+ dm_bufio_release(buf);
+
+ if (r < 0 && neras)
+ DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
+ v->data_dev->name, (unsigned long long)rsb, r);
+ else if (r > 0)
+ DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
+ v->data_dev->name, (unsigned long long)rsb, r);
+
+ return r;
+}
+
+/*
+ * Locate data block erasures using verity hashes.
+ */
+static int verity_fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *want_digest, u8 *data)
+{
+ if (unlikely(verity_hash(v, io_hash_desc(v, io),
+ data, 1 << v->data_dev_block_bits,
+ io_real_digest(v, io))))
+ return 0;
+
+ return memcmp(io_real_digest(v, io), want_digest, v->digest_size) != 0;
+}
+
+/*
+ * Read 1 << v->data_dev_block_bits interleaved FEC blocks into io->fec_buf
+ * and check for erasure locations if neras is non-NULL.
+ */
+static int verity_fec_read_buf(struct dm_verity *v, struct dm_verity_io *io,
+ u64 rsb, u64 target, int *neras)
+{
+ int i, j, target_index = -1;
+ struct dm_buffer *buf;
+ struct dm_bufio_client *bufio;
+ u64 block, ileaved;
+ u8 *bbuf;
+ u8 want_digest[v->digest_size];
+
+ if (neras)
+ *neras = 0;
+
+ for (i = 0; i < v->fec_rsn; i++) {
+ ileaved = verity_fec_interleave(v, rsb * v->fec_rsn + i);
+
+ if (ileaved == target)
+ target_index = i;
+
+ block = ileaved >> v->data_dev_block_bits;
+ bufio = v->data_bufio;
+
+ if (block >= v->data_blocks) {
+ block -= v->data_blocks;
+
+ if (unlikely(block >= v->fec_hash_blocks))
+ continue;
+
+ block += v->hash_start;
+ bufio = v->hash_bufio;
+ }
+
+ bbuf = dm_bufio_read(bufio, block, &buf);
+
+ if (unlikely(IS_ERR(bbuf))) {
+ DMERR("%s: FEC %llu: read failed (block %llu): %ld",
+ v->data_dev->name, (unsigned long long)rsb,
+ (unsigned long long)block, PTR_ERR(bbuf));
+ return -1;
+ }
+
+ if (block < v->data_blocks &&
+ verity_hash_for_block(v, io, block, want_digest) == 0) {
+ if (neras && *neras <= v->fec_roots &&
+ verity_fec_is_erasure(v, io, want_digest, bbuf))
+ io->erasures[(*neras)++] = i;
+ }
+
+ for (j = 0; j < 1 << v->data_dev_block_bits; j++)
+ io->fec_buf[j * v->fec_rsn + i] = bbuf[j];
+
+ dm_bufio_release(buf);
+ }
+
+ return target_index;
+}
+
+/*
+ * Initialize Reed-Solomon and FEC buffers, and allocate them if needed.
+ */
+static int verity_fec_alloc_buffers(struct dm_verity *v,
+ struct dm_verity_io *io)
+{
+ size_t bufsize;
+
+ if (!io->rs) {
+ io->rs = init_rs(8, 0x11d, 0, 1, v->fec_roots);
+
+ if (unlikely(!io->rs)) {
+ DMERR("init_rs failed");
+ return -ENOMEM;
+ }
+ }
+
+ bufsize = v->fec_rsn << v->data_dev_block_bits;
+
+ if (!io->fec_buf) {
+ io->fec_buf = vzalloc(bufsize);
+
+ if (unlikely(!io->fec_buf)) {
+ DMERR("vzalloc failed (%zu bytes)", bufsize);
+ return -ENOMEM;
+ }
+ } else
+ memset(io->fec_buf, 0, bufsize);
+
+ bufsize = v->fec_rsn * sizeof(int);
+
+ if (!io->erasures) {
+ io->erasures = kzalloc(bufsize, GFP_KERNEL);
+
+ if (unlikely(!io->erasures)) {
+ DMERR("kmalloc failed (%zu bytes)", bufsize);
+ return -ENOMEM;
+ }
+ } else
+ memset(io->erasures, 0, bufsize);
+
+ return 0;
+}
+
+/*
+ * Decode an interleaved RS block. If use_erasures is non-zero, uses hashes to
+ * locate erasures. If returns zero, the corrected block is in the beginning of
+ * io->fec_buf.
+ */
+static int verity_fec_decode_rsb(struct dm_verity *v,
+ struct dm_verity_io *io, u64 rsb,
+ u64 offset, int use_erasures)
+{
+ int r, neras = 0;
+
+ r = verity_fec_alloc_buffers(v, io);
+ if (unlikely(r < 0))
+ return -1;
+
+ r = verity_fec_read_buf(v, io, rsb, offset,
+ use_erasures ? &neras : NULL);
+ if (unlikely(r < 0))
+ return r;
+
+ r = verity_fec_decode_buf(v, io, rsb, r, neras);
+ if (r < 0)
+ return r;
+
+ r = verity_hash(v, io_hash_desc(v, io), io->fec_buf,
+ 1 << v->data_dev_block_bits, io_real_digest(v, io));
+ if (unlikely(r < 0))
+ return r;
+
+ if (memcmp(io_real_digest(v, io), io_want_digest(v, io),
+ v->digest_size)) {
+ DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
+ v->data_dev->name, (unsigned long long)rsb, neras);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int verity_fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *data, size_t len)
+{
+ BUG_ON(io->fec_pos + len > 1 << v->data_dev_block_bits);
+ memcpy(data, &io->fec_buf[io->fec_pos], len);
+ io->fec_pos += len;
+ return 0;
+}
+
+/*
+ * Correct errors in a block. Copies corrected block to dest if non-NULL,
+ * otherwise to io->bio_vec starting from provided vector and offset.
+ */
+static int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+ enum verity_block_type type, sector_t block,
+ u8 *dest, struct bvec_iter *iter)
+{
+ int r = -1;
+ u64 offset, res, rsb;
+
+ if (!v->fec_bufio)
+ return -1;
+
+ if (type == DM_VERITY_BLOCK_TYPE_METADATA)
+ block += v->data_blocks;
+
+ /*
+ * For RS(M, N), the continuous FEC data is divided into blocks of N
+ * bytes. Since block size may not be divisible by N, the last block
+ * is zero padded when decoding.
+ *
+ * Each byte of the block is covered by a different RS(255, N) code,
+ * and each code is interleaved over N blocks to make it less likely
+ * that bursty corruption will leave us in unrecoverable state.
+ */
+
+ offset = block << v->data_dev_block_bits;
+
+ res = offset;
+ do_div(res, v->fec_rounds << v->data_dev_block_bits);
+
+ /*
+ * The base RS block we can feed to the interleaver to find out all
+ * blocks required for decoding.
+ */
+ rsb = offset - res * (v->fec_rounds << v->data_dev_block_bits);
+
+ /*
+ * Locating erasures is slow, so attempt to recover the block without
+ * them first. Do a second attempt with erasures if the corruption is
+ * bad enough.
+ */
+ r = verity_fec_decode_rsb(v, io, rsb, offset, 0);
+ if (r < 0)
+ r = verity_fec_decode_rsb(v, io, rsb, offset, 1);
+
+ if (r < 0)
+ return r;
+
+ if (dest)
+ memcpy(dest, io->fec_buf, 1 << v->hash_dev_block_bits);
+ else if (iter) {
+ io->fec_pos = 0;
+ r = verity_for_bv_block(v, io, iter, verity_fec_bv_copy);
+ }
+
+ return r;
+}
+
+
+/*
* End one "io" structure with a given error.
*/
static void verity_finish_io(struct dm_verity_io *io, int error)
@@ -490,6 +893,14 @@ static void verity_finish_io(struct dm_verity_io *io, int error)
bio->bi_private = io->orig_bi_private;
bio->bi_error = error;
+ if (io->rs)
+ free_rs(io->rs);
+
+ if (io->fec_buf)
+ vfree(io->fec_buf);
+
+ kfree(io->erasures);
+
bio_endio(bio);
}
@@ -546,7 +957,7 @@ static void verity_prefetch_io(struct work_struct *work)
hash_block_end = v->hash_blocks - 1;
}
no_prefetch_cluster:
- dm_bufio_prefetch(v->bufio, hash_block_start,
+ dm_bufio_prefetch(v->hash_bufio, hash_block_start,
hash_block_end - hash_block_start + 1);
}
@@ -608,6 +1019,10 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
bio->bi_private = io;
io->iter = bio->bi_iter;
+ io->rs = NULL;
+ io->erasures = NULL;
+ io->fec_buf = NULL;
+
verity_submit_prefetch(v, io);
generic_make_request(bio);
@@ -622,6 +1037,7 @@ static void verity_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
struct dm_verity *v = ti->private;
+ unsigned args = 0;
unsigned sz = 0;
unsigned x;
@@ -648,8 +1064,15 @@ static void verity_status(struct dm_target *ti, status_type_t type,
else
for (x = 0; x < v->salt_size; x++)
DMEMIT("%02x", v->salt[x]);
+ if (v->mode != DM_VERITY_MODE_EIO)
+ args++;
+ if (v->fec_dev)
+ args += DM_VERITY_OPTS_FEC;
+ if (!args)
+ return;
+ DMEMIT(" %u", args);
if (v->mode != DM_VERITY_MODE_EIO) {
- DMEMIT(" 1 ");
+ DMEMIT(" ");
switch (v->mode) {
case DM_VERITY_MODE_LOGGING:
DMEMIT(DM_VERITY_OPT_LOGGING);
@@ -661,6 +1084,15 @@ static void verity_status(struct dm_target *ti, status_type_t type,
BUG();
}
}
+ if (v->fec_dev)
+ DMEMIT(" " DM_VERITY_OPT_FEC_DEV " %s "
+ DM_VERITY_OPT_FEC_BLOCKS " %llu "
+ DM_VERITY_OPT_FEC_START " %llu "
+ DM_VERITY_OPT_FEC_ROOTS " %d",
+ v->fec_dev->name,
+ (unsigned long long)v->fec_blocks,
+ (unsigned long long)v->fec_start,
+ v->fec_roots);
break;
}
}
@@ -707,8 +1139,12 @@ static void verity_dtr(struct dm_target *ti)
if (v->verify_wq)
destroy_workqueue(v->verify_wq);
- if (v->bufio)
- dm_bufio_client_destroy(v->bufio);
+ if (v->data_bufio)
+ dm_bufio_client_destroy(v->data_bufio);
+ if (v->hash_bufio)
+ dm_bufio_client_destroy(v->hash_bufio);
+ if (v->fec_bufio)
+ dm_bufio_client_destroy(v->fec_bufio);
kfree(v->salt);
kfree(v->root_digest);
@@ -718,11 +1154,12 @@ static void verity_dtr(struct dm_target *ti)
kfree(v->alg_name);
- if (v->hash_dev)
- dm_put_device(ti, v->hash_dev);
-
if (v->data_dev)
dm_put_device(ti, v->data_dev);
+ if (v->hash_dev)
+ dm_put_device(ti, v->hash_dev);
+ if (v->fec_dev)
+ dm_put_device(ti, v->fec_dev);
kfree(v);
}
@@ -730,6 +1167,11 @@ static void verity_dtr(struct dm_target *ti)
static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
const char *opt_string)
{
+ int r;
+ unsigned long long num_ll;
+ unsigned char num_c;
+ char dummy;
+
if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING)) {
v->mode = DM_VERITY_MODE_LOGGING;
return 0;
@@ -738,6 +1180,53 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
return 0;
}
+ /* Remaining arguments require a value */
+ if (!as->argc)
+ goto bad;
+
+ if (!strcasecmp(opt_string, DM_VERITY_OPT_FEC_DEV)) {
+ r = dm_get_device(v->ti, dm_shift_arg(as), FMODE_READ,
+ &v->fec_dev);
+ if (r) {
+ v->ti->error = "FEC device lookup failed";
+ return r;
+ }
+
+ return 1;
+ } else if (!strcasecmp(opt_string, DM_VERITY_OPT_FEC_BLOCKS)) {
+ if (sscanf(dm_shift_arg(as), "%llu%c", &num_ll, &dummy) != 1 ||
+ (sector_t)(num_ll <<
+ (v->data_dev_block_bits - SECTOR_SHIFT))
+ >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
+ v->ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+ return -EINVAL;
+ }
+
+ v->fec_blocks = num_ll;
+ return 1;
+ } else if (!strcasecmp(opt_string, DM_VERITY_OPT_FEC_START)) {
+ if (sscanf(dm_shift_arg(as), "%llu%c", &num_ll, &dummy) != 1 ||
+ (sector_t)(num_ll <<
+ (v->data_dev_block_bits - SECTOR_SHIFT))
+ >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
+ v->ti->error = "Invalid " DM_VERITY_OPT_FEC_START;
+ return -EINVAL;
+ }
+
+ v->fec_start = num_ll;
+ return 1;
+ } else if (!strcasecmp(opt_string, DM_VERITY_OPT_FEC_ROOTS)) {
+ if (sscanf(dm_shift_arg(as), "%hhu%c", &num_c, &dummy) != 1 ||
+ !num_c || num_c >= DM_VERITY_FEC_RSM) {
+ v->ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS;
+ return -EINVAL;
+ }
+
+ v->fec_roots = num_c;
+ return 1;
+ }
+
+bad:
v->ti->error = "Invalid feature arguments";
return -EINVAL;
}
@@ -968,17 +1457,17 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
}
v->hash_blocks = hash_position;
- v->bufio = dm_bufio_client_create(v->hash_dev->bdev,
+ v->hash_bufio = dm_bufio_client_create(v->hash_dev->bdev,
1 << v->hash_dev_block_bits, 1, sizeof(struct buffer_aux),
- dm_bufio_alloc_callback, NULL);
- if (IS_ERR(v->bufio)) {
- ti->error = "Cannot initialize dm-bufio";
- r = PTR_ERR(v->bufio);
- v->bufio = NULL;
+ dm_hash_bufio_alloc_callback, NULL);
+ if (IS_ERR(v->hash_bufio)) {
+ ti->error = "Cannot initialize dm-bufio for hash device";
+ r = PTR_ERR(v->hash_bufio);
+ v->hash_bufio = NULL;
goto bad;
}
- if (dm_bufio_get_device_size(v->bufio) < v->hash_blocks) {
+ if (dm_bufio_get_device_size(v->hash_bufio) < v->hash_blocks) {
ti->error = "Hash device is too small";
r = -E2BIG;
goto bad;
@@ -994,6 +1483,115 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
+ if (v->fec_dev) {
+ /*
+ * FEC is computed over data blocks, hash blocks, and possible
+ * metadata. In other words, FEC covers total of fec_blocks
+ * blocks consisting of the following:
+ *
+ * data blocks | hash blocks | metadata (optional)
+ *
+ * We allow metadata after hash blocks to support a use case
+ * where all data is stored on the same device and FEC covers
+ * the entire area.
+ *
+ * If metadata is included, we require it to be available on the
+ * hash device after the hash blocks.
+ */
+
+ u64 hash_blocks = v->hash_blocks - v->hash_start;
+
+ /*
+ * Require matching block sizes for data and hash devices for
+ * simplicity.
+ */
+ if (v->data_dev_block_bits != v->hash_dev_block_bits) {
+ ti->error = "Block sizes must match to use FEC";
+ r = -EINVAL;
+ goto bad;
+ }
+
+ if (!v->fec_roots) {
+ ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS;
+ r = -EINVAL;
+ goto bad;
+ }
+
+ v->fec_rsn = DM_VERITY_FEC_RSM - v->fec_roots;
+
+ if (!v->fec_blocks) {
+ ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS;
+ r = -EINVAL;
+ goto bad;
+ }
+
+ v->fec_rounds = v->fec_blocks;
+
+ if (do_div(v->fec_rounds, v->fec_rsn))
+ v->fec_rounds++;
+
+ /*
+ * Due to optional metadata, fec_blocks can be larger than
+ * data_blocks and hash_blocks combined.
+ */
+ if (v->fec_blocks < v->data_blocks + hash_blocks ||
+ !v->fec_rounds) {
+ ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+ r = -EINVAL;
+ goto bad;
+ }
+
+ /*
+ * Metadata is accessed through the hash device, so we require
+ * it to be large enough.
+ */
+ v->fec_hash_blocks = v->fec_blocks - v->data_blocks;
+
+ if (dm_bufio_get_device_size(v->hash_bufio) <
+ v->fec_hash_blocks) {
+ ti->error = "Hash device is too small for "
+ DM_VERITY_OPT_FEC_BLOCKS;
+ r = -E2BIG;
+ goto bad;
+ }
+
+ v->fec_bufio = dm_bufio_client_create(v->fec_dev->bdev,
+ 1 << v->data_dev_block_bits,
+ 1, 0, NULL, NULL);
+
+ if (IS_ERR(v->fec_bufio)) {
+ ti->error = "Cannot initialize dm-bufio";
+ r = PTR_ERR(v->fec_bufio);
+ v->fec_bufio = NULL;
+ goto bad;
+ }
+
+ if (dm_bufio_get_device_size(v->fec_bufio) <
+ (v->fec_start + v->fec_rounds * v->fec_roots)
+ >> v->data_dev_block_bits) {
+ ti->error = "FEC device is too small";
+ r = -E2BIG;
+ goto bad;
+ }
+
+ v->data_bufio = dm_bufio_client_create(v->data_dev->bdev,
+ 1 << v->data_dev_block_bits,
+ 1, 0, NULL, NULL);
+
+ if (IS_ERR(v->data_bufio)) {
+ ti->error = "Cannot initialize dm-bufio";
+ r = PTR_ERR(v->data_bufio);
+ v->data_bufio = NULL;
+ goto bad;
+ }
+
+ if (dm_bufio_get_device_size(v->data_bufio) < v->data_blocks) {
+ ti->error = "Data device is too small";
+ r = -E2BIG;
+ goto bad;
+ }
+ }
+
return 0;
bad:
@@ -1037,5 +1635,6 @@ module_exit(dm_verity_exit);
MODULE_AUTHOR("Mikulas Patocka <[email protected]>");
MODULE_AUTHOR("Mandeep Baines <[email protected]>");
MODULE_AUTHOR("Will Drewry <[email protected]>");
+MODULE_AUTHOR("Sami Tolvanen <[email protected]>");
MODULE_DESCRIPTION(DM_NAME " target for transparent disk integrity checking");
MODULE_LICENSE("GPL");
--
2.6.0.rc2.230.g3dd15c0
Add ignore_zero_blocks option, which returns zeros for blocks matching a
zero hash without validating the content.
Signed-off-by: Sami Tolvanen <[email protected]>
---
Documentation/device-mapper/verity.txt | 5 ++
drivers/md/dm-verity.c | 88 ++++++++++++++++++++++++++++++----
2 files changed, 83 insertions(+), 10 deletions(-)
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
index 3628d28..1b103b0 100644
--- a/Documentation/device-mapper/verity.txt
+++ b/Documentation/device-mapper/verity.txt
@@ -79,6 +79,11 @@ restart_on_corruption
not compatible with ignore_corruption and requires user space support to
avoid restart loops.
+ignore_zero_blocks
+ Do not verify blocks that are expected to contain zeros and always return
+ zeros instead. This may be useful if the partition contains unused blocks
+ that are not guaranteed to contain zeros.
+
use_fec_from_device
Use forward error correction (FEC) to recover from corruption if hash
verification fails. Use encoding data from the specified device. This
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 61dec39..485d59e 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -38,6 +38,7 @@
#define DM_VERITY_OPT_LOGGING "ignore_corruption"
#define DM_VERITY_OPT_RESTART "restart_on_corruption"
+#define DM_VERITY_OPT_IGN_ZEROS "ignore_zero_blocks"
#define DM_VERITY_OPT_FEC_DEV "use_fec_from_device"
#define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks"
@@ -45,7 +46,7 @@
#define DM_VERITY_OPT_FEC_ROOTS "fec_roots"
#define DM_VERITY_OPTS_FEC 8
-#define DM_VERITY_OPTS_MAX (1 + DM_VERITY_OPTS_FEC)
+#define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC)
static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
@@ -74,6 +75,7 @@ struct dm_verity {
struct crypto_shash *tfm;
u8 *root_digest; /* digest of the root block */
u8 *salt; /* salt: its size is salt_size */
+ u8 *zero_digest; /* digest for a zero block */
unsigned salt_size;
sector_t data_start; /* data offset in 512-byte sectors */
sector_t hash_start; /* hash start in blocks */
@@ -422,9 +424,9 @@ release_ret_r:
* of the hash tree if necessary.
*/
static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
- sector_t block, u8 *digest)
+ sector_t block, u8 *digest, bool *is_zero)
{
- int r, i;
+ int r = 0, i;
if (likely(v->levels)) {
/*
@@ -436,7 +438,7 @@ static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
*/
r = verity_verify_level(v, io, block, 0, true, digest);
if (likely(r <= 0))
- return r;
+ goto out;
}
memcpy(digest, v->root_digest, v->digest_size);
@@ -444,10 +446,16 @@ static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
for (i = v->levels - 1; i >= 0; i--) {
r = verity_verify_level(v, io, block, i, false, digest);
if (unlikely(r))
- return r;
+ goto out;
}
- return 0;
+out:
+ if (!r && v->zero_digest)
+ *is_zero = !memcmp(v->zero_digest, digest, v->digest_size);
+ else
+ *is_zero = false;
+
+ return r;
}
/*
@@ -496,11 +504,19 @@ static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io,
return verity_hash_update(v, io_hash_desc(v, io), data, len);
}
+static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *data, size_t len)
+{
+ memset(data, 0, len);
+ return 0;
+}
+
/*
* Verify one "dm_verity_io" structure.
*/
static int verity_verify_io(struct dm_verity_io *io)
{
+ bool is_zero;
struct dm_verity *v = io->v;
struct bvec_iter start;
unsigned b;
@@ -510,10 +526,23 @@ static int verity_verify_io(struct dm_verity_io *io)
struct shash_desc *desc = io_hash_desc(v, io);
r = verity_hash_for_block(v, io, io->block + b,
- io_want_digest(v, io));
+ io_want_digest(v, io), &is_zero);
if (unlikely(r < 0))
return r;
+ if (is_zero) {
+ /*
+ * If we expect a zero block, don't validate, just
+ * return zeros.
+ */
+ r = verity_for_bv_block(v, io, &io->iter,
+ verity_bv_zero);
+ if (unlikely(r < 0))
+ return r;
+
+ continue;
+ }
+
r = verity_hash_init(v, desc);
if (unlikely(r < 0))
return r;
@@ -674,6 +703,7 @@ static int verity_fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
static int verity_fec_read_buf(struct dm_verity *v, struct dm_verity_io *io,
u64 rsb, u64 target, int *neras)
{
+ bool is_zero;
int i, j, target_index = -1;
struct dm_buffer *buf;
struct dm_bufio_client *bufio;
@@ -713,9 +743,13 @@ static int verity_fec_read_buf(struct dm_verity *v, struct dm_verity_io *io,
}
if (block < v->data_blocks &&
- verity_hash_for_block(v, io, block, want_digest) == 0) {
- if (neras && *neras <= v->fec_roots &&
- verity_fec_is_erasure(v, io, want_digest, bbuf))
+ verity_hash_for_block(v, io, block, want_digest,
+ &is_zero) == 0) {
+ if (is_zero)
+ memset(bbuf, 0, 1 << v->data_dev_block_bits);
+ else if (neras && *neras <= v->fec_roots &&
+ verity_fec_is_erasure(v, io, want_digest,
+ bbuf))
io->erasures[(*neras)++] = i;
}
@@ -1066,6 +1100,8 @@ static void verity_status(struct dm_target *ti, status_type_t type,
DMEMIT("%02x", v->salt[x]);
if (v->mode != DM_VERITY_MODE_EIO)
args++;
+ if (v->zero_digest)
+ args++;
if (v->fec_dev)
args += DM_VERITY_OPTS_FEC;
if (!args)
@@ -1084,6 +1120,8 @@ static void verity_status(struct dm_target *ti, status_type_t type,
BUG();
}
}
+ if (v->zero_digest)
+ DMEMIT(" " DM_VERITY_OPT_IGN_ZEROS);
if (v->fec_dev)
DMEMIT(" " DM_VERITY_OPT_FEC_DEV " %s "
DM_VERITY_OPT_FEC_BLOCKS " %llu "
@@ -1148,6 +1186,7 @@ static void verity_dtr(struct dm_target *ti)
kfree(v->salt);
kfree(v->root_digest);
+ kfree(v->zero_digest);
if (v->tfm)
crypto_free_shash(v->tfm);
@@ -1164,6 +1203,29 @@ static void verity_dtr(struct dm_target *ti)
kfree(v);
}
+static int verity_alloc_zero_digest(struct dm_verity *v)
+{
+ int r;
+ u8 desc[v->shash_descsize];
+ u8 *zero_data;
+
+ v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL);
+
+ if (!v->zero_digest)
+ return -ENOMEM;
+
+ zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL);
+
+ if (!zero_data)
+ return -ENOMEM; /* verity_dtr will free zero_digest */
+
+ r = verity_hash(v, (struct shash_desc *)desc, zero_data,
+ 1 << v->data_dev_block_bits, v->zero_digest);
+
+ kfree(zero_data);
+ return r;
+}
+
static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
const char *opt_string)
{
@@ -1178,6 +1240,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
} else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART)) {
v->mode = DM_VERITY_MODE_RESTART;
return 0;
+ } else if (!strcasecmp(opt_string, DM_VERITY_OPT_IGN_ZEROS)) {
+ r = verity_alloc_zero_digest(v);
+ if (r)
+ v->ti->error = "Cannot allocate zero digest";
+
+ return r;
}
/* Remaining arguments require a value */
--
2.6.0.rc2.230.g3dd15c0
Hi Sami,
[auto build test ERROR on: dm/for-next]
[also build test ERROR on: v4.3 next-20151104]
url: https://github.com/0day-ci/linux/commits/Sami-Tolvanen/dm-verity-clean-up-duplicate-hashing-code/20151105-124458
base: https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git for-next
config: i386-randconfig-a0-201544 (attached as .config)
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
All errors (new ones prefixed by >>):
drivers/md/dm-verity.c: In function 'verity_fec_decode_rs8':
>> drivers/md/dm-verity.c:569:9: error: implicit declaration of function 'decode_rs8' [-Werror=implicit-function-declaration]
return decode_rs8(io->rs, data, par, v->fec_rsn, NULL, neras,
^
cc1: some warnings being treated as errors
vim +/decode_rs8 +569 drivers/md/dm-verity.c
563 int i;
564 uint16_t par[v->fec_roots];
565
566 for (i = 0; i < v->fec_roots; i++)
567 par[i] = fec[i];
568
> 569 return decode_rs8(io->rs, data, par, v->fec_rsn, NULL, neras,
570 io->erasures, 0, NULL);
571 }
572
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
On 11/05/2015 03:02 AM, Sami Tolvanen wrote:
> This patch set adds error correction support to dm-verity, which
> makes it possible to recover from data corruption in exchange of
> increased space overhead.
>
> The feature is implemented as part of dm-verity to take advantage
> of the existing hash tree to improve performance and locate known
> erasures.
Hi,
could you please elaborate why is all this needed? To extend support
of some faulty flash chips?
Do you have some statistics that there are really such correctable errors
in real devices?
Anyway, I really do not understand layer separation here. Either we have
cryptographically strong data integrity checking or we have
error-correction. Are we sure this combination does not create some unintended
gap in integrity checking? Why the integrity check should even try to do some
error correction if there is an intentional integrity attack?
IMO if you need an error correction, this should be placed as a separate
layer below the crypto integrity check, the same as RAID operates.
The second question - why are you writing another separate tool
for maintenance for dm-verity when there is veritysetup?
Milan
On Thu, Nov 05, 2015 at 08:34:04AM +0100, Milan Broz wrote:
> could you please elaborate why is all this needed? To extend support
> of some faulty flash chips?
This makes dm-verity more robust against corruption caused by either
hardware or software bugs, both of which we have seen in the past on
actual devices.
Note that unlike the error correction sometimes included in flash
storage devices, this doesn't merely protect against random bit flips,
it makes it possible to recover from several megabytes of corrupted
or lost data.
> Do you have some statistics that there are really such correctable errors
> in real devices?
Sorry, I don't have statistics to share at the moment.
> Anyway, I really do not understand layer separation here.
I should have elaborated more on this. Implementing this without
integrity checking would not be feasible for a few reasons:
1. Being able to detect which blocks are corrupted allows us to
avoid correcting valid blocks. Correcting errors is slow and
this is the only way to keep performance acceptable.
2. Due to a property of erasure codes, we can correct twice as
many errors if we know where the errors are. Using the hash
tree to detect corrupted blocks lets us locate erasures.
3. Error correction algorithms may not produce valid output and
without integrity checking, there's no reliable way to detect
when we actually succeeded in correcting a block.
> Are we sure this combination does not create some unintended
> gap in integrity checking?
Yes, I'm sure. Corrupted blocks are integrity checked again after they
are corrected to make sure only valid data is allowed to pass.
> Why the integrity check should even try to do some
> error correction if there is an intentional integrity attack?
Most corruption is not malicious and being able to recover from it
makes the system more reliable. This doesn't make it any easier for an
attacker to break dm-verity. That would still require finding a hash
collision (or being able to modify the hash tree).
> The second question - why are you writing another separate tool
> for maintenance for dm-verity when there is veritysetup?
Our tool for generating error correction metadata is independent from
dm-verity. This data is also used by other software to correct errors
during software updates, for example. If there's interest, I can help
in adding this functionality to veritysetup.
Sami
Hi Sami,
[auto build test WARNING on: dm/for-next]
[also build test WARNING on: v4.3 next-20151105]
url: https://github.com/0day-ci/linux/commits/Sami-Tolvanen/dm-verity-clean-up-duplicate-hashing-code/20151105-124458
base: https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git for-next
config: s390-allyesconfig (attached as .config)
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=s390
All warnings (new ones prefixed by >>):
drivers/md/dm-verity.c: In function 'verity_fec_decode_rsb':
>> drivers/md/dm-verity.c:812:1: warning: 'verity_fec_decode_rsb' uses dynamic stack allocation
}
^
vim +/verity_fec_decode_rsb +812 drivers/md/dm-verity.c
796 if (r < 0)
797 return r;
798
799 r = verity_hash(v, io_hash_desc(v, io), io->fec_buf,
800 1 << v->data_dev_block_bits, io_real_digest(v, io));
801 if (unlikely(r < 0))
802 return r;
803
804 if (memcmp(io_real_digest(v, io), io_want_digest(v, io),
805 v->digest_size)) {
806 DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
807 v->data_dev->name, (unsigned long long)rsb, neras);
808 return -1;
809 }
810
811 return 0;
> 812 }
813
814 static int verity_fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io,
815 u8 *data, size_t len)
816 {
817 BUG_ON(io->fec_pos + len > 1 << v->data_dev_block_bits);
818 memcpy(data, &io->fec_buf[io->fec_pos], len);
819 io->fec_pos += len;
820 return 0;
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi Sami,
[auto build test WARNING on: dm/for-next]
[also build test WARNING on: v4.3 next-20151105]
url: https://github.com/0day-ci/linux/commits/Sami-Tolvanen/dm-verity-clean-up-duplicate-hashing-code/20151105-124458
base: https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git for-next
config: s390-allyesconfig (attached as .config)
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=s390
All warnings (new ones prefixed by >>):
drivers/md/dm-verity.c: In function 'verity_alloc_zero_digest':
>> drivers/md/dm-verity.c:1226:1: warning: 'verity_alloc_zero_digest' uses dynamic stack allocation
}
^
drivers/md/dm-verity.c: In function 'verity_fec_decode_rsb':
drivers/md/dm-verity.c:846:1: warning: 'verity_fec_decode_rsb' uses dynamic stack allocation
}
^
vim +/verity_alloc_zero_digest +1226 drivers/md/dm-verity.c
1210
1211 v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL);
1212
1213 if (!v->zero_digest)
1214 return -ENOMEM;
1215
1216 zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL);
1217
1218 if (!zero_data)
1219 return -ENOMEM; /* verity_dtr will free zero_digest */
1220
1221 r = verity_hash(v, (struct shash_desc *)desc, zero_data,
1222 1 << v->data_dev_block_bits, v->zero_digest);
1223
1224 kfree(zero_data);
1225 return r;
> 1226 }
1227
1228 static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
1229 const char *opt_string)
1230 {
1231 int r;
1232 unsigned long long num_ll;
1233 unsigned char num_c;
1234 char dummy;
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
On Thu, 5 Nov 2015, Milan Broz wrote:
> On 11/05/2015 03:02 AM, Sami Tolvanen wrote:
> > This patch set adds error correction support to dm-verity, which
> > makes it possible to recover from data corruption in exchange of
> > increased space overhead.
> >
> > The feature is implemented as part of dm-verity to take advantage
> > of the existing hash tree to improve performance and locate known
> > erasures.
>
> Hi,
>
> could you please elaborate why is all this needed? To extend support
> of some faulty flash chips?
>
> Do you have some statistics that there are really such correctable errors
> in real devices?
I'm also wondering what is this patch useful for. Disks and flash
controllers have their own error detection and correction, so the
controller will much more likely return an I/O error rather than corrupted
data. And the patch does absolutely nothing to recover from an I/O error,
it only attempts to correct corrupted reads.
Another point - if the read-only system partition is experiencing some
errors, than the read-write partition will probably have errors too
(because both partitions are on the same flash chip) and the Chromebook or
smartphone will be unusable anyway because of errors on the writeable
partition. Do you have some real case where such error corrections
increase longevity of some device?
> Anyway, I really do not understand layer separation here. Either we have
> cryptographically strong data integrity checking or we have
> error-correction. Are we sure this combination does not create some unintended
> gap in integrity checking? Why the integrity check should even try to do some
> error correction if there is an intentional integrity attack?
>
> IMO if you need an error correction, this should be placed as a separate
> layer below the crypto integrity check, the same as RAID operates.
If error correction was placed below dm-verity, it would degrade
performance because it would have to verify every sector, even if
dm-verity said that the sector is valid.
But you can take raid5 in read-only mode, put it on several partitions
protected with dm-verity and you get decent error correction (unlike this
patch, it would also correct I/O errors returned by the flash controller).
I suggest doing this.
> The second question - why are you writing another separate tool
> for maintenance for dm-verity when there is veritysetup?
>
> Milan
Mikulas
On Fri, Nov 06, 2015 at 12:23:29PM -0500, Mikulas Patocka wrote:
> I'm also wondering what is this patch useful for. Disks and flash
> controllers have their own error detection and correction
I think I addressed this earlier. Some storage devices are able to
correct bit flips, but don't have enough redundancy to correct larger
errors. Using this patch set we can correct N MiB of consecutive
corruption anywhere on the partition with the same amount of storage
overhead.
> Another point - if the read-only system partition is experiencing some
> errors, than the read-write partition will probably have errors too
On mobile devices, errors in read-only partitions often lead to
bricked devices while errors in the read-write parts might only lead
to lost cat photos. There are situations where people would prefer to
have a working phone even if it fails to store some of their data.
> Do you have some real case where such error corrections
> increase longevity of some device?
Yes, there have been several cases where read-only partition errors
have rendered a device unusable. The sheer volume of mobile devices
means that even if a tiny fraction of them suffer from such a problem,
it's going to affect a large number of people.
> But you can take raid5 in read-only mode, put it on several partitions
> protected with dm-verity and you get decent error correction
I agree. Unfortunately, we don't currently have the luxury of using
raid on mobile devices.
Sami
Dne 6.11.2015 v 20:06 Sami Tolvanen napsal(a):
> On Fri, Nov 06, 2015 at 12:23:29PM -0500, Mikulas Patocka wrote:
>> I'm also wondering what is this patch useful for. Disks and flash
>> controllers have their own error detection and correction
>
> I think I addressed this earlier. Some storage devices are able to
> correct bit flips, but don't have enough redundancy to correct larger
> errors. Using this patch set we can correct N MiB of consecutive
> corruption anywhere on the partition with the same amount of storage
> overhead.
>
>> Another point - if the read-only system partition is experiencing some
>> errors, than the read-write partition will probably have errors too
>
> On mobile devices, errors in read-only partitions often lead to
> bricked devices while errors in the read-write parts might only lead
> to lost cat photos. There are situations where people would prefer to
> have a working phone even if it fails to store some of their data.
>
>> Do you have some real case where such error corrections
>> increase longevity of some device?
>
> Yes, there have been several cases where read-only partition errors
> have rendered a device unusable. The sheer volume of mobile devices
> means that even if a tiny fraction of them suffer from such a problem,
> it's going to affect a large number of people.
>
>> But you can take raid5 in read-only mode, put it on several partitions
>> protected with dm-verity and you get decent error correction
>
> I agree. Unfortunately, we don't currently have the luxury of using
> raid on mobile devices.
AFAIK - you just build as much partition as need to have some 'space'
dedicated for redundancy - and rest of 'partitions' you join as 'writable'
i.e. you have 1G of space - you want to give 250MB as 'redundancy' -
so create 4 partition....
Since phones starts to have 64GB of storage space - it looks like a way to go.....
As no one bothers to upgrade 'old' phone - why to focus there??
And BTW - already seen couple bricked Nexus7....
(And no marhmallow in plan....)
(And interestingly all killed I've seen were on Lolipop - none with Kitkat)
Zdenek
On Fri, Nov 06, 2015 at 08:20:15PM +0100, Zdenek Kabelac wrote:
> i.e. you have 1G of space - you want to give 250MB as 'redundancy' -
> so create 4 partition....
We cannot afford to set aside 25% of read-only partition space for
redundancy on mobile devices, and would rather not impact performance
any more than dm-verity already does. With error correction we have
0.8% space overhead in our use case and no performance degradation if
the partition is not corrupted.
Sami
Dne 6.11.2015 v 21:27 Sami Tolvanen napsal(a):
> On Fri, Nov 06, 2015 at 08:20:15PM +0100, Zdenek Kabelac wrote:
>> i.e. you have 1G of space - you want to give 250MB as 'redundancy' -
>> so create 4 partition....
well data safety has it's price - user should choose what he prefers
- more games and videos or more safety...
>
> We cannot afford to set aside 25% of read-only partition space for
> redundancy on mobile devices, and would rather not impact performance
> any more than dm-verity already does. With error correction we have
> 0.8% space overhead in our use case and no performance degradation if
> the partition is not corrupted.
>
I'm probably missing here some hw knowledge here - but if you loose
a flash block of some size - then you typically get 'error' for
all bytes the sector/block.
So how do you want to correctly 'restore' missing full sectors with just
0.8% data overhead ??
Or is the device which fails to correct block returning something 'still
usable' (since e.g. SATA disk certainly not)
Zdenek
On Fri, Nov 06, 2015 at 10:05:24PM +0100, Zdenek Kabelac wrote:
> So how do you want to correctly 'restore' missing full sectors
> with just 0.8% data overhead ??
We use interleaving. Each byte in a 4k block is part of a different
Reed-Solomon block, which means an entire lost 4k data block looks like a
single error. When we also spread the bytes that form a single Reed-Solomon
block across the partition, we end up being able to correct several
megabytes of consecutive corrupted blocks with very small space overhead.
Sami
On Fri, 6 Nov 2015, Sami Tolvanen wrote:
> On Fri, Nov 06, 2015 at 12:23:29PM -0500, Mikulas Patocka wrote:
> > I'm also wondering what is this patch useful for. Disks and flash
> > controllers have their own error detection and correction
>
> I think I addressed this earlier. Some storage devices are able to
> correct bit flips, but don't have enough redundancy to correct larger
> errors. Using this patch set we can correct N MiB of consecutive
> corruption anywhere on the partition with the same amount of storage
> overhead.
So, why doesn't the patch correct I/O errors? It is more likely that the
flash controller returns an I/O error than corrupted data. Why are you
correcting corrupted data (that is unlikely) and not correcting I/O errors
that are likely?
> > Another point - if the read-only system partition is experiencing some
> > errors, than the read-write partition will probably have errors too
>
> On mobile devices, errors in read-only partitions often lead to
> bricked devices while errors in the read-write parts might only lead
> to lost cat photos. There are situations where people would prefer to
> have a working phone even if it fails to store some of their data.
The read-write partition holds compiled applications and I doubt the
smartphone would work if there were random bits flipped.
> > Do you have some real case where such error corrections
> > increase longevity of some device?
>
> Yes, there have been several cases where read-only partition errors
> have rendered a device unusable. The sheer volume of mobile devices
> means that even if a tiny fraction of them suffer from such a problem,
> it's going to affect a large number of people.
Why don't you reflash the device from bootloader? (by holding power and
volume keys simultaneously on startup and using the fastboot utility)
> > But you can take raid5 in read-only mode, put it on several partitions
> > protected with dm-verity and you get decent error correction
>
> I agree. Unfortunately, we don't currently have the luxury of using
> raid on mobile devices.
Why not, it's just a simple kernel option. If raid5 already does data
correction, there is no reason why to duplicate this work.
> Sami
Mikulas
On Fri, 6 Nov 2015, Sami Tolvanen wrote:
> On Fri, Nov 06, 2015 at 08:20:15PM +0100, Zdenek Kabelac wrote:
> > i.e. you have 1G of space - you want to give 250MB as 'redundancy' -
> > so create 4 partition....
>
> We cannot afford to set aside 25% of read-only partition space for
You can have 20 raid5 legs (with as big chunksizes as possible - so that
sequential accesses are really sequential when they hit the flash device),
and that means setting aside only 5%. Or even more legs.
Mikulas
> redundancy on mobile devices, and would rather not impact performance
> any more than dm-verity already does. With error correction we have
> 0.8% space overhead in our use case and no performance degradation if
> the partition is not corrupted.
>
> Sami
On Fri, 6 Nov 2015, Sami Tolvanen wrote:
> On Fri, Nov 06, 2015 at 10:05:24PM +0100, Zdenek Kabelac wrote:
> > So how do you want to correctly 'restore' missing full sectors
> > with just 0.8% data overhead ??
>
> We use interleaving. Each byte in a 4k block is part of a different
> Reed-Solomon block, which means an entire lost 4k data block looks like a
> single error. When we also spread the bytes that form a single Reed-Solomon
> block across the partition, we end up being able to correct several
> megabytes of consecutive corrupted blocks with very small space overhead.
>
> Sami
But the patch doesn't do any correction in case of I/O error - it just
returns the bio with I/O error.
BTW. the function verity_fec_alloc_buffers does vzalloc and GFP_KERNEL
kmalloc inside I/O handling code - that shouldn't be done - you should
preallocate the data structures when the device is loaded.
Mikulas
On 2015-11-07 10:18, Mikulas Patocka wrote:
>
>
> On Fri, 6 Nov 2015, Sami Tolvanen wrote:
>
>> On Fri, Nov 06, 2015 at 12:23:29PM -0500, Mikulas Patocka wrote:
>>> Do you have some real case where such error corrections
>>> increase longevity of some device?
>>
>> Yes, there have been several cases where read-only partition errors
>> have rendered a device unusable. The sheer volume of mobile devices
>> means that even if a tiny fraction of them suffer from such a problem,
>> it's going to affect a large number of people.
>
> Why don't you reflash the device from bootloader? (by holding power and
> volume keys simultaneously on startup and using the fastboot utility)
Requiring an end user to re-flash their device is not good business
practice, even if it is a rare occurrence (it adds a potential
attack-vector for malware, and is decidedly non-trivial to do for
someone without programming background). Phones, tablets, and other
embedded systems are the type of thing where the device needs to be
usable when the user wants to access it, period. Minimizing the chances
of the device not working (even for less common causes like this patch
tries to protect against) is crucial to making the user experience as
good as possible (and reducing customer support costs).
On Thu, Nov 05 2015 at 12:33pm -0500,
Sami Tolvanen <[email protected]> wrote:
> On Thu, Nov 05, 2015 at 08:34:04AM +0100, Milan Broz wrote:
> > could you please elaborate why is all this needed? To extend support
> > of some faulty flash chips?
>
> This makes dm-verity more robust against corruption caused by either
> hardware or software bugs, both of which we have seen in the past on
> actual devices.
>
> Note that unlike the error correction sometimes included in flash
> storage devices, this doesn't merely protect against random bit flips,
> it makes it possible to recover from several megabytes of corrupted
> or lost data.
Google (via Android and/or ChromeOS) is the primary consumer of dm-verity.
As such I'm inclined to trust Google's need for this feature and that it
has been carefully designed and implemented. But obviously we need to
verify that.
Patches 1 and 2 look fine to me (just refactoring, no functional
change). I may find something upon closer review but we'll cross that
bridge if/when we get to it.
> > Do you have some statistics that there are really such correctable errors
> > in real devices?
>
> Sorry, I don't have statistics to share at the moment.
>
> > Anyway, I really do not understand layer separation here.
>
> I should have elaborated more on this. Implementing this without
> integrity checking would not be feasible for a few reasons:
>
> 1. Being able to detect which blocks are corrupted allows us to
> avoid correcting valid blocks. Correcting errors is slow and
> this is the only way to keep performance acceptable.
>
> 2. Due to a property of erasure codes, we can correct twice as
> many errors if we know where the errors are. Using the hash
> tree to detect corrupted blocks lets us locate erasures.
>
> 3. Error correction algorithms may not produce valid output and
> without integrity checking, there's no reliable way to detect
> when we actually succeeded in correcting a block.
This all makes sense to me.
So for patch 3:
I'm left wondering: can the new error correction code be made an
optional feature that is off by default? -- so as to preserve some
isolation of this new code from the old dm-verity behaviour.
Looking at the code it isn't immediately clear to me where any of this
is _really_ optional; closest I see if verity_fec_decode() returning
-1 if (!v->fec_bufio)... might be good to add a wrapper like
verity_fec_is_enabled().
The if (v->fec_dev) {} block in verity_ctr() should probably be split
out to a new function. Similar to how
drivers/md/dm-thin.c:pool_create() will return error string via **error,
etc.
In addition the kbuild errors/warnings (reported by the kbuild test
robot) need fixing.
Also, the 2 other big questions from Mikulas need answering:
1) why aren't you actually adjustng error codes, returning success, if
dm-verity was able to trap/correct the corruption?
2) please fix the code to preallocate all required memory -- so that
verity_fec_alloc_buffers() isn't called in map. Any reason why you
couldn't collect the table's fec options and determine how much
additional memory is needed per dm_verity_io? And then just add that
to the per-bio-data?
> > Are we sure this combination does not create some unintended
> > gap in integrity checking?
>
> Yes, I'm sure. Corrupted blocks are integrity checked again after they
> are corrected to make sure only valid data is allowed to pass.
Makes sense.
> > Why the integrity check should even try to do some
> > error correction if there is an intentional integrity attack?
>
> Most corruption is not malicious and being able to recover from it
> makes the system more reliable. This doesn't make it any easier for an
> attacker to break dm-verity. That would still require finding a hash
> collision (or being able to modify the hash tree).
>
> > The second question - why are you writing another separate tool
> > for maintenance for dm-verity when there is veritysetup?
>
> Our tool for generating error correction metadata is independent from
> dm-verity. This data is also used by other software to correct errors
> during software updates, for example. If there's interest, I can help
> in adding this functionality to veritysetup.
If this error correction feature is going to go upstream we really
should see any associated userspace enablement also included in
veritysetup. Really no sense in fragmenting the utilities used to setup
a dm-verity device.
On Mon, Nov 09, 2015 at 11:37:35AM -0500, Mike Snitzer wrote:
> I'm left wondering: can the new error correction code be made an
> optional feature that is off by default? -- so as to preserve some
> isolation of this new code from the old dm-verity behaviour.
It's optional in the sense that you must specify error correction
parameters in the table to turn it on. Otherwise, verity_dec_decode
returns -1 and dm-verity handles errors as before.
> might be good to add a wrapper like verity_fec_is_enabled().
Sure. I can do this in v2 and address the other feedback and build
issues as well.
> Also, the 2 other big questions from Mikulas need answering:
> 1) why aren't you actually adjustng error codes, returning success, if
> dm-verity was able to trap/correct the corruption?
We don't see actual I/O errors very often. Most corruption we've seen
is caused by flaky hardware that doesn't return errors. However, I can
certainly change to code to attempt recovery in this case too.
> 2) please fix the code to preallocate all required memory -- so that
> verity_fec_alloc_buffers() isn't called in map.
I tried to avoid preallocating the buffers because they are relatively
large (up to 1 MiB depending on the Reed-Solomon parameters) and not
required unless we have errors to correct. I suppose there's no way to
safely do this in the middle of I/O?
> If this error correction feature is going to go upstream we really
> should see any associated userspace enablement also included in
> veritysetup.
I can look into this.
Sami
On Mon, Nov 09 2015 at 2:19pm -0500,
Sami Tolvanen <[email protected]> wrote:
> On Mon, Nov 09, 2015 at 11:37:35AM -0500, Mike Snitzer wrote:
> > I'm left wondering: can the new error correction code be made an
> > optional feature that is off by default? -- so as to preserve some
> > isolation of this new code from the old dm-verity behaviour.
>
> It's optional in the sense that you must specify error correction
> parameters in the table to turn it on. Otherwise, verity_dec_decode
> returns -1 and dm-verity handles errors as before.
>
> > might be good to add a wrapper like verity_fec_is_enabled().
>
> Sure. I can do this in v2 and address the other feedback and build
> issues as well.
Thanks.
> > Also, the 2 other big questions from Mikulas need answering:
> > 1) why aren't you actually adjustng error codes, returning success, if
> > dm-verity was able to trap/correct the corruption?
>
> We don't see actual I/O errors very often. Most corruption we've seen
> is caused by flaky hardware that doesn't return errors. However, I can
> certainly change to code to attempt recovery in this case too.
OK, might be worthwhile to simulate underlying storage errors using the
dm-flakey target underneath dm-verity just to validate your changes work
as expected.
> > 2) please fix the code to preallocate all required memory -- so that
> > verity_fec_alloc_buffers() isn't called in map.
>
> I tried to avoid preallocating the buffers because they are relatively
> large (up to 1 MiB depending on the Reed-Solomon parameters) and not
> required unless we have errors to correct. I suppose there's no way to
> safely do this in the middle of I/O?
Basically you want to be able to ensure forward progress of dm-verity
IO even in the face of no system memory being generally available.
Hopefully this doesn't seem like make-work for you. This one of the
design considerations imposed on DM targets because otherwise you run
the risk of failing IO in the face of low memory. Which results in
users experiencing failures.
1MB is quite large, especially if it is unlikely to be used. dm-cache
does cope with the need for memory in the IO path, see 'struct
prealloc', prealloc_data_structs() and other related functions in
drivers/md/dm-cache-target.c
But in the dm-cache case a worker thread is processing work and needs
memory for each work item. So it isn't the .map function that directly
needs the memory.
DM-thinp also pre-allocates memory using a mempool, e.g. see
drivers/md/dm-thin.c:bio_detain(). But again, this portion of dm-thinp
is being run from a worker thread and _not_ the .map functons.
Could you architect the FEC code such that it punts IO to a worker
thread only if errors need correcting? And have that worker thread's
additional memory allocations be backed by a mempool?
On 11/09/2015 08:19 PM, Sami Tolvanen wrote:
...
> We don't see actual I/O errors very often. Most corruption we've seen
> is caused by flaky hardware that doesn't return errors. However, I can
> certainly change to code to attempt recovery in this case too.
So if I understand it correctly, there is a simplified flash controller
that can send data with bit flips without detection?
(IOW in "real" SSD this kind of error should be detected internally
by some bad block management?)
This is why I asked about some statistics of real visible types of errors.
For this use case it makes sense to have error correction here but then
we should clearly said that it makes no sense to switch it on for "real" hw
that does internal integrity check or error correction
(but not authenticated integrity check as dm-verity).
...
>> If this error correction feature is going to go upstream we really
>> should see any associated userspace enablement also included in
>> veritysetup.
>
> I can look into this.
Yes, please, patches do not to be production ready (I can integrate
it to veritysetup upstream myself) but it would be very nice that
released veritysetup can configure all dm-verity features in the same time
the mainline kernel is marked stable.
(The same applies for dm-crypt & cryptsetup.)
Thanks,
Milan
On Mon, 9 Nov 2015, Sami Tolvanen wrote:
> > Also, the 2 other big questions from Mikulas need answering:
> > 1) why aren't you actually adjustng error codes, returning success, if
> > dm-verity was able to trap/correct the corruption?
>
> We don't see actual I/O errors very often. Most corruption we've seen
> is caused by flaky hardware that doesn't return errors. However, I can
> certainly change to code to attempt recovery in this case too.
What flash controller and chips do you use? What is the probability of I/O
error and what is the probability of silent data corruption? Is the silent
data corruption permanent or transient? What is causing the silent data
corruption (decay in flash chips?, errors on the bus?) Why can't you ask
the hardware engineers to use a controler with proper error correction?
Without these data - it looks like you first wrote the patch and then
tried to make some excuses why it should be accepted.
I'm also a little bit concerned that the patch will increase prevalence of
crapware on the market - when accepted, this kind of reasoning will
follow: "now we have error correction in the kernel, so we cut down flash
overprovisioning, save a dollar or two per device, and produce a crap that
randomly corrupts user's data on the read-write partition (because that
partition not protected by the error correction)".
Mikulas
On Wed, Nov 4, 2015 at 6:02 PM, Sami Tolvanen <[email protected]> wrote:
> Handle dm-verity salting in one place to simplify the code.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
Thanks for cleaning this up!
Reviewed-by: Kees Cook <[email protected]>
-Kees
> ---
> drivers/md/dm-verity.c | 262 +++++++++++++++++++++++++++----------------------
> 1 file changed, 147 insertions(+), 115 deletions(-)
>
> diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
> index edc624b..487cb66 100644
> --- a/drivers/md/dm-verity.c
> +++ b/drivers/md/dm-verity.c
> @@ -173,6 +173,84 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
> return block >> (level * v->hash_per_block_bits);
> }
>
> +/*
> + * Wrapper for crypto_shash_init, which handles verity salting.
> + */
> +static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc)
> +{
> + int r;
> +
> + desc->tfm = v->tfm;
> + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
> +
> + r = crypto_shash_init(desc);
> +
> + if (unlikely(r < 0)) {
> + DMERR("crypto_shash_init failed: %d", r);
> + return r;
> + }
> +
> + if (likely(v->version >= 1)) {
> + r = crypto_shash_update(desc, v->salt, v->salt_size);
> +
> + if (unlikely(r < 0)) {
> + DMERR("crypto_shash_update failed: %d", r);
> + return r;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc,
> + const u8 *data, size_t len)
> +{
> + int r = crypto_shash_update(desc, data, len);
> +
> + if (unlikely(r < 0))
> + DMERR("crypto_shash_update failed: %d", r);
> +
> + return r;
> +}
> +
> +static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc,
> + u8 *digest)
> +{
> + int r;
> +
> + if (unlikely(!v->version)) {
> + r = crypto_shash_update(desc, v->salt, v->salt_size);
> +
> + if (r < 0) {
> + DMERR("crypto_shash_update failed: %d", r);
> + return r;
> + }
> + }
> +
> + r = crypto_shash_final(desc, digest);
> +
> + if (unlikely(r < 0))
> + DMERR("crypto_shash_final failed: %d", r);
> +
> + return r;
> +}
> +
> +static int verity_hash(struct dm_verity *v, struct shash_desc *desc,
> + const u8 *data, size_t len, u8 *digest)
> +{
> + int r;
> +
> + r = verity_hash_init(v, desc);
> + if (unlikely(r < 0))
> + return r;
> +
> + r = verity_hash_update(v, desc, data, len);
> + if (unlikely(r < 0))
> + return r;
> +
> + return verity_hash_final(v, desc, digest);
> +}
> +
> static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
> sector_t *hash_block, unsigned *offset)
> {
> @@ -253,10 +331,10 @@ out:
> * If "skip_unverified" is false, unverified buffer is hashed and verified
> * against current value of io_want_digest(v, io).
> */
> -static int verity_verify_level(struct dm_verity_io *io, sector_t block,
> - int level, bool skip_unverified)
> +static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
> + sector_t block, int level, bool skip_unverified,
> + u8 *want_digest)
> {
> - struct dm_verity *v = io->v;
> struct dm_buffer *buf;
> struct buffer_aux *aux;
> u8 *data;
> @@ -273,75 +351,72 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block,
> aux = dm_bufio_get_aux_data(buf);
>
> if (!aux->hash_verified) {
> - struct shash_desc *desc;
> - u8 *result;
> -
> if (skip_unverified) {
> r = 1;
> goto release_ret_r;
> }
>
> - desc = io_hash_desc(v, io);
> - desc->tfm = v->tfm;
> - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
> - r = crypto_shash_init(desc);
> - if (r < 0) {
> - DMERR("crypto_shash_init failed: %d", r);
> + r = verity_hash(v, io_hash_desc(v, io),
> + data, 1 << v->hash_dev_block_bits,
> + io_real_digest(v, io));
> + if (unlikely(r < 0))
> goto release_ret_r;
> - }
> -
> - if (likely(v->version >= 1)) {
> - r = crypto_shash_update(desc, v->salt, v->salt_size);
> - if (r < 0) {
> - DMERR("crypto_shash_update failed: %d", r);
> - goto release_ret_r;
> - }
> - }
>
> - r = crypto_shash_update(desc, data, 1 << v->hash_dev_block_bits);
> - if (r < 0) {
> - DMERR("crypto_shash_update failed: %d", r);
> - goto release_ret_r;
> - }
> -
> - if (!v->version) {
> - r = crypto_shash_update(desc, v->salt, v->salt_size);
> - if (r < 0) {
> - DMERR("crypto_shash_update failed: %d", r);
> - goto release_ret_r;
> - }
> - }
> -
> - result = io_real_digest(v, io);
> - r = crypto_shash_final(desc, result);
> - if (r < 0) {
> - DMERR("crypto_shash_final failed: %d", r);
> + if (likely(memcmp(io_real_digest(v, io), want_digest,
> + v->digest_size) == 0))
> + aux->hash_verified = 1;
> + else if (verity_handle_err(v,
> + DM_VERITY_BLOCK_TYPE_METADATA,
> + hash_block)) {
> + r = -EIO;
> goto release_ret_r;
> }
> - if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
> - if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA,
> - hash_block)) {
> - r = -EIO;
> - goto release_ret_r;
> - }
> - } else
> - aux->hash_verified = 1;
> }
>
> data += offset;
> -
> - memcpy(io_want_digest(v, io), data, v->digest_size);
> -
> - dm_bufio_release(buf);
> - return 0;
> + memcpy(want_digest, data, v->digest_size);
> + r = 0;
>
> release_ret_r:
> dm_bufio_release(buf);
> -
> return r;
> }
>
> /*
> + * Find a hash for a given block, write it to digest and verify the integrity
> + * of the hash tree if necessary.
> + */
> +static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
> + sector_t block, u8 *digest)
> +{
> + int i;
> + int r;
> +
> + if (likely(v->levels)) {
> + /*
> + * First, we try to get the requested hash for
> + * the current block. If the hash block itself is
> + * verified, zero is returned. If it isn't, this
> + * function returns 1 and we fall back to whole
> + * chain verification.
> + */
> + r = verity_verify_level(v, io, block, 0, true, digest);
> + if (likely(r <= 0))
> + return r;
> + }
> +
> + memcpy(digest, v->root_digest, v->digest_size);
> +
> + for (i = v->levels - 1; i >= 0; i--) {
> + r = verity_verify_level(v, io, block, i, false, digest);
> + if (unlikely(r))
> + return r;
> + }
> +
> + return 0;
> +}
> +
> +/*
> * Verify one "dm_verity_io" structure.
> */
> static int verity_verify_io(struct dm_verity_io *io)
> @@ -350,54 +425,21 @@ static int verity_verify_io(struct dm_verity_io *io)
> struct bio *bio = dm_bio_from_per_bio_data(io,
> v->ti->per_bio_data_size);
> unsigned b;
> - int i;
>
> for (b = 0; b < io->n_blocks; b++) {
> - struct shash_desc *desc;
> - u8 *result;
> int r;
> unsigned todo;
> + struct shash_desc *desc = io_hash_desc(v, io);
>
> - if (likely(v->levels)) {
> - /*
> - * First, we try to get the requested hash for
> - * the current block. If the hash block itself is
> - * verified, zero is returned. If it isn't, this
> - * function returns 0 and we fall back to whole
> - * chain verification.
> - */
> - int r = verity_verify_level(io, io->block + b, 0, true);
> - if (likely(!r))
> - goto test_block_hash;
> - if (r < 0)
> - return r;
> - }
> -
> - memcpy(io_want_digest(v, io), v->root_digest, v->digest_size);
> -
> - for (i = v->levels - 1; i >= 0; i--) {
> - int r = verity_verify_level(io, io->block + b, i, false);
> - if (unlikely(r))
> - return r;
> - }
> + r = verity_hash_for_block(v, io, io->block + b,
> + io_want_digest(v, io));
> + if (unlikely(r < 0))
> + return r;
>
> -test_block_hash:
> - desc = io_hash_desc(v, io);
> - desc->tfm = v->tfm;
> - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
> - r = crypto_shash_init(desc);
> - if (r < 0) {
> - DMERR("crypto_shash_init failed: %d", r);
> + r = verity_hash_init(v, desc);
> + if (unlikely(r < 0))
> return r;
> - }
>
> - if (likely(v->version >= 1)) {
> - r = crypto_shash_update(desc, v->salt, v->salt_size);
> - if (r < 0) {
> - DMERR("crypto_shash_update failed: %d", r);
> - return r;
> - }
> - }
> todo = 1 << v->data_dev_block_bits;
> do {
> u8 *page;
> @@ -408,37 +450,27 @@ test_block_hash:
> len = bv.bv_len;
> if (likely(len >= todo))
> len = todo;
> - r = crypto_shash_update(desc, page + bv.bv_offset, len);
> + r = verity_hash_update(v, desc, page + bv.bv_offset,
> + len);
> kunmap_atomic(page);
>
> - if (r < 0) {
> - DMERR("crypto_shash_update failed: %d", r);
> + if (unlikely(r < 0))
> return r;
> - }
>
> bio_advance_iter(bio, &io->iter, len);
> todo -= len;
> } while (todo);
>
> - if (!v->version) {
> - r = crypto_shash_update(desc, v->salt, v->salt_size);
> - if (r < 0) {
> - DMERR("crypto_shash_update failed: %d", r);
> - return r;
> - }
> - }
> -
> - result = io_real_digest(v, io);
> - r = crypto_shash_final(desc, result);
> - if (r < 0) {
> - DMERR("crypto_shash_final failed: %d", r);
> + r = verity_hash_final(v, desc, io_real_digest(v, io));
> + if (unlikely(r < 0))
> return r;
> - }
> - if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) {
> - if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
> - io->block + b))
> - return -EIO;
> - }
> +
> + if (likely(memcmp(io_real_digest(v, io),
> + io_want_digest(v, io), v->digest_size) == 0))
> + continue;
> + else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
> + io->block + b))
> + return -EIO;
> }
>
> return 0;
> --
> 2.6.0.rc2.230.g3dd15c0
>
--
Kees Cook
Chrome OS Security
On Wed, Nov 4, 2015 at 6:02 PM, Sami Tolvanen <[email protected]> wrote:
> Move optional argument parsing into a separate function to make it
> easier to add more of them without making verity_ctr even longer.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
-Kees
> ---
> drivers/md/dm-verity.c | 31 ++++++++++++++++++++++---------
> 1 file changed, 22 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
> index 487cb66..da76f77 100644
> --- a/drivers/md/dm-verity.c
> +++ b/drivers/md/dm-verity.c
> @@ -34,6 +34,8 @@
> #define DM_VERITY_OPT_LOGGING "ignore_corruption"
> #define DM_VERITY_OPT_RESTART "restart_on_corruption"
>
> +#define DM_VERITY_OPTS_MAX 1
> +
> static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
>
> module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
> @@ -725,6 +727,21 @@ static void verity_dtr(struct dm_target *ti)
> kfree(v);
> }
>
> +static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
> + const char *opt_string)
> +{
> + if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING)) {
> + v->mode = DM_VERITY_MODE_LOGGING;
> + return 0;
> + } else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART)) {
> + v->mode = DM_VERITY_MODE_RESTART;
> + return 0;
> + }
> +
> + v->ti->error = "Invalid feature arguments";
> + return -EINVAL;
> +}
> +
> /*
> * Target parameters:
> * <version> The current format is version 1.
> @@ -752,7 +769,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
> char dummy;
>
> static struct dm_arg _args[] = {
> - {0, 1, "Invalid number of feature args"},
> + {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"},
> };
>
> v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
> @@ -912,15 +929,11 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
> goto bad;
> }
>
> - if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING))
> - v->mode = DM_VERITY_MODE_LOGGING;
> - else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART))
> - v->mode = DM_VERITY_MODE_RESTART;
> - else {
> - ti->error = "Invalid feature arguments";
> - r = -EINVAL;
> + r = verity_parse_opt_args(&as, v, opt_string);
> + if (r < 0)
> goto bad;
> - }
> +
> + opt_params -= r;
> }
> }
>
> --
> 2.6.0.rc2.230.g3dd15c0
>
--
Kees Cook
Chrome OS Security
On Wed, Nov 04 2015 at 9:02P -0500,
Sami Tolvanen <[email protected]> wrote:
> Move optional argument parsing into a separate function to make it
> easier to add more of them without making verity_ctr even longer.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
I've taken this patch, for Linux 4.5, but I've applied the following
incremental patch ontop to make verity's optional argument parsing
follow establish DM patterns (the code is very similar to
dm-mpath.c:parse_features). Your follow-on patches will obviously need
to be rebased on this, but the code will be much cleaner.
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 3b09e50..2b0008f 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -723,19 +723,42 @@ static void verity_dtr(struct dm_target *ti)
kfree(v);
}
-static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
- const char *opt_string)
+static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
{
- if (!strcasecmp(opt_string, DM_VERITY_OPT_LOGGING)) {
- v->mode = DM_VERITY_MODE_LOGGING;
- return 0;
- } else if (!strcasecmp(opt_string, DM_VERITY_OPT_RESTART)) {
- v->mode = DM_VERITY_MODE_RESTART;
+ int r;
+ unsigned argc;
+ struct dm_target *ti = v->ti;
+ const char *arg_name;
+
+ static struct dm_arg _args[] = {
+ {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"},
+ };
+
+ r = dm_read_arg_group(_args, &as, &argc, &ti->error);
+ if (r)
+ return -EINVAL;
+
+ if (!argc)
return 0;
- }
- v->ti->error = "Invalid feature arguments";
- return -EINVAL;
+ do {
+ arg_name = dm_shift_arg(&as);
+ argc--;
+
+ if (!strcasecmp(arg_name, DM_VERITY_OPT_LOGGING)) {
+ v->mode = DM_VERITY_MODE_LOGGING;
+ continue;
+
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) {
+ v->mode = DM_VERITY_MODE_RESTART;
+ continue;
+ }
+
+ ti->error = "Unrecognized verity feature request";
+ return -EINVAL;
+ } while (argc && !r);
+
+ return r;
}
/*
@@ -757,17 +780,13 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
struct dm_verity *v;
struct dm_arg_set as;
const char *opt_string;
- unsigned int num, opt_params;
+ unsigned int num;
unsigned long long num_ll;
int r;
int i;
sector_t hash_position;
char dummy;
- static struct dm_arg _args[] = {
- {0, DM_VERITY_OPTS_MAX, "Invalid number of feature args"},
- };
-
v = kzalloc(sizeof(struct dm_verity), GFP_KERNEL);
if (!v) {
ti->error = "Cannot allocate verity structure";
@@ -912,25 +931,9 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
as.argc = argc;
as.argv = argv;
- r = dm_read_arg_group(_args, &as, &opt_params, &ti->error);
- if (r)
+ r = verity_parse_opt_args(&as, v);
+ if (r < 0)
goto bad;
-
- while (opt_params) {
- opt_params--;
- opt_string = dm_shift_arg(&as);
- if (!opt_string) {
- ti->error = "Not enough feature arguments";
- r = -EINVAL;
- goto bad;
- }
-
- r = verity_parse_opt_args(&as, v, opt_string);
- if (r < 0)
- goto bad;
-
- opt_params -= r;
- }
}
v->hash_per_block_bits =
On Mon, Nov 09 2015 at 2:19P -0500,
Sami Tolvanen <[email protected]> wrote:
> On Mon, Nov 09, 2015 at 11:37:35AM -0500, Mike Snitzer wrote:
> > I'm left wondering: can the new error correction code be made an
> > optional feature that is off by default? -- so as to preserve some
> > isolation of this new code from the old dm-verity behaviour.
>
> It's optional in the sense that you must specify error correction
> parameters in the table to turn it on. Otherwise, verity_dec_decode
> returns -1 and dm-verity handles errors as before.
>
> > might be good to add a wrapper like verity_fec_is_enabled().
>
> Sure. I can do this in v2 and address the other feedback and build
> issues as well.
Hi Sami,
Any progress on v2 that takes into account previous review feedback?
Note I've staged the first 2 patches in this series for Linux 4.5
inclusion, see:
https://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/log/?h=dm-4.5
Mike
On Wed, Dec 02, 2015 at 03:22:03PM -0500, Mike Snitzer wrote:
> Any progress on v2 that takes into account previous review feedback?
Yes. Sorry for the delay, the changes have gone through a couple of rounds
of internal reviews and testing first.
> Note I've staged the first 2 patches in this series for Linux 4.5
> inclusion
Great, thanks. I'll send v2 rebased on top of these today.
Sami
On Thu, Nov 12, 2015 at 01:50:04PM -0500, Mikulas Patocka wrote:
> What flash controller and chips do you use?
Considering the number of different devices running Android, I don't
have a good answer for this. I'm guessing most of them.
> Is the silent data corruption permanent or transient?
Most of the corruption we have observed is permanent, and typically
caused by a write failure rather than a read failure. A while ago we
also discovered a bug in our kernels which resulted in unexpected
modification of read-only partitions and ended up causing quite a lot
of problems. While in an ideal world this wouldn't happen, in real
life it's better to have an additional layer of protection against
issues like these.
> Why can't you ask the hardware engineers to use a controler with
> proper error correction?
The most advanced hardware error correction I've seen only handles
errors within a single sector and cannot detect all possible
corruption, let alone correct it. If you have examples of hardware
with proper error correction, I would love to take a look. Of course,
if this is even $1-2 more expensive per device than the current
hardware, chances are it's not going to make the budget cut with many
device manufacturers, whether we like it or not.
> Without these data - it looks like you first wrote the patch and
> then tried to make some excuses why it should be accepted.
I posted the patches primarily to hear your feedback, not necessarily
to get them accepted. The only goal I have is to improve the
reliability of devices using dm-verity. This solution makes it
possible to recover from a large number of corrupted blocks with
a very small storage overhead and no additional CPU overhead when
the partition is not corrupted (and thus, no additional power
consumption). I can fully understand that these may not be important
concerns in other environments where one might just as well run raid5
over multiple dm-verity devices, as you suggested.
> I'm also a little bit concerned that the patch will increase
> prevalence of crapware on the market
We are already concerned about current devices that end up with
corrupted partitions for one reason or another. When an ecosystem
consists of more than 10^9 devices, if even a small fraction of them
are returned or need to be repaired due to a dm-verity failure,
it quickly becomes very expensive and actively discourages device
manufacturers from adopting dm-verity. This is not even considering
the number of people inconvenienced by these issues.
Sami
On Thu, Nov 12, 2015 at 11:30:00AM +0100, Milan Broz wrote:
> Yes, please, patches do not to be production ready (I can integrate
> it to veritysetup upstream myself)
I'm working on this and should have a patch for veritysetup for you next
week.
Sami
Changes since v1:
- Added CONFIG_DM_VERITY_FEC and split error correction into
dm-verity-fec.[ch] to further separate the functionality from the
rest of dm-verity. Follows the same pattern as dm-uevent.
- Added missing dependencies for REED_SOLOMON to Kconfig.
- Renamed dm-verity.c to dm-verity-target.c to allow an optional
object to be added. Follows the naming convention of dm-cache and
dm-era.
- Changed the algorithm to work with one or more small buffers (~4k)
instead of a single large one. The more buffers we can allocate,
the faster it will work, but we don't have to preallocate a large
amount of memory anymore.
- Changed memory allocation to use mempools. v2 preallocates all the
memory required for each worker thread to guarantee forward
progress in case of memory pressure. The code attempts to allocate
more buffers (using GFP_NOIO) and uses them if available.
- Added graceful handling of IO errors, which are now treated as any
other corruption.
- Rebased against linux-dm/for-next.
Sami Tolvanen (2):
dm verity: add support for forward error correction
dm verity: ignore zero blocks
Documentation/device-mapper/verity.txt | 30 +
drivers/md/Kconfig | 12 +
drivers/md/Makefile | 5 +
drivers/md/dm-verity-fec.c | 836 +++++++++++++++++++++++++
drivers/md/dm-verity-fec.h | 146 +++++
drivers/md/{dm-verity.c => dm-verity-target.c} | 320 ++++++----
drivers/md/dm-verity.h | 129 ++++
7 files changed, 1343 insertions(+), 135 deletions(-)
create mode 100644 drivers/md/dm-verity-fec.c
create mode 100644 drivers/md/dm-verity-fec.h
rename drivers/md/{dm-verity.c => dm-verity-target.c} (82%)
create mode 100644 drivers/md/dm-verity.h
--
2.6.0.rc2.230.g3dd15c0
Add support for correcting corrupted blocks using Reed-Solomon.
This code uses RS(255, N) interleaved across data and hash
blocks. Each error-correcting block covers N bytes evenly
distributed across the combined total data, so that each byte is a
maximum distance away from the others. This makes it possible to
recover from several consecutive corrupted blocks with relatively
small space overhead.
In addition, using verity hashes to locate erasures nearly doubles
the effectiveness of error correction. Being able to detect
corrupted blocks also improves performance, because only corrupted
blocks need to corrected.
For a 2 GiB partition, RS(255, 253) (two parity bytes for each
253-byte block) can correct up to 16 MiB of consecutive corrupted
blocks if erasures can be located, and 8 MiB if they cannot, with
16 MiB space overhead.
Signed-off-by: Sami Tolvanen <[email protected]>
---
Documentation/device-mapper/verity.txt | 25 +
drivers/md/Kconfig | 12 +
drivers/md/Makefile | 5 +
drivers/md/dm-verity-fec.c | 830 +++++++++++++++++++++++++
drivers/md/dm-verity-fec.h | 146 +++++
drivers/md/{dm-verity.c => dm-verity-target.c} | 244 ++++----
drivers/md/dm-verity.h | 128 ++++
7 files changed, 1259 insertions(+), 131 deletions(-)
create mode 100644 drivers/md/dm-verity-fec.c
create mode 100644 drivers/md/dm-verity-fec.h
rename drivers/md/{dm-verity.c => dm-verity-target.c} (84%)
create mode 100644 drivers/md/dm-verity.h
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
index e15bc1a..1058f36 100644
--- a/Documentation/device-mapper/verity.txt
+++ b/Documentation/device-mapper/verity.txt
@@ -79,6 +79,31 @@ restart_on_corruption
not compatible with ignore_corruption and requires user space support to
avoid restart loops.
+use_fec_from_device
+ Use forward error correction (FEC) to recover from corruption if hash
+ verification fails. Use encoding data from the specified device. This
+ may be the same device where data and hash blocks reside, in which case
+ fec_start must be outside data and hash areas.
+
+ If the encoding data covers additional metadata, it must be accessible
+ on the hash device after the hash blocks.
+
+ Note: block sizes for data and hash devices must match.
+
+fec_roots
+ Number of generator roots. This equals to the number of parity bytes in
+ the encoding data. For example, in RS(M, N) encoding, the number of roots
+ is M-N.
+
+fec_blocks
+ The number of encoding data blocks on the FEC device. The block size for
+ the FEC device is <data_block_size>.
+
+fec_start
+ This is the offset, in <data_block_size> blocks, from the start of the
+ FEC device to the beginning of the encoding data.
+
+
Theory of operation
===================
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 1b69359..0a2e727 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -467,6 +467,18 @@ config DM_VERITY
If unsure, say N.
+config DM_VERITY_FEC
+ bool "Verity forward error correction support"
+ depends on DM_VERITY
+ select REED_SOLOMON
+ select REED_SOLOMON_DEC8
+ ---help---
+ Add forward error correction support to dm-verity. This option
+ makes it possible to use pre-generated error correction data to
+ recover from corrupted blocks.
+
+ If unsure, say N.
+
config DM_SWITCH
tristate "Switch target support (EXPERIMENTAL)"
depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index f34979c..62a6576 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -16,6 +16,7 @@ dm-cache-mq-y += dm-cache-policy-mq.o
dm-cache-smq-y += dm-cache-policy-smq.o
dm-cache-cleaner-y += dm-cache-policy-cleaner.o
dm-era-y += dm-era-target.o
+dm-verity-y += dm-verity-target.o
md-mod-y += md.o bitmap.o
raid456-y += raid5.o raid5-cache.o
@@ -63,3 +64,7 @@ obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o
endif
+
+ifeq ($(CONFIG_DM_VERITY_FEC),y)
+dm-verity-objs += dm-verity-fec.o
+endif
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
new file mode 100644
index 0000000..c3a2531
--- /dev/null
+++ b/drivers/md/dm-verity-fec.c
@@ -0,0 +1,830 @@
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Sami Tolvanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include "dm-verity-fec.h"
+
+#define DM_MSG_PREFIX "verity-fec"
+
+/*
+ * If error correction has been configured, returns true.
+ */
+bool verity_fec_is_enabled(struct dm_verity *v)
+{
+ return v->fec && v->fec->dev;
+}
+
+/*
+ * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable
+ * length fields.
+ */
+static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io)
+{
+ return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io);
+}
+
+/*
+ * Return an interleaved offset for a byte in RS block.
+ */
+static inline u64 fec_interleave(struct dm_verity *v, u64 offset)
+{
+ u32 mod;
+
+ mod = do_div(offset, v->fec->rsn);
+ return offset + mod * (v->fec->rounds << v->data_dev_block_bits);
+}
+
+/*
+ * Decode an RS block using Reed-Solomon.
+ */
+static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
+ u8 *data, u8 *fec, int neras)
+{
+ int i;
+ uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN];
+
+ for (i = 0; i < v->fec->roots; i++)
+ par[i] = fec[i];
+
+ return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras,
+ fio->erasures, 0, NULL);
+}
+
+/*
+ * Read error-correcting codes for the requested RS block. Returns a pointer
+ * to the data block. Caller is responsible for releasing buf.
+ */
+static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
+ unsigned *offset, struct dm_buffer **buf)
+{
+ u64 position, block;
+ u8 *res;
+
+ position = (index + rsb) * v->fec->roots;
+ block = position >> v->data_dev_block_bits;
+
+ *offset = (unsigned)(position - (block << v->data_dev_block_bits));
+
+ res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf);
+
+ if (unlikely(IS_ERR(res))) {
+ DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
+ v->data_dev->name, (unsigned long long)rsb,
+ (unsigned long long)(v->fec->start + block),
+ PTR_ERR(res));
+ *buf = NULL;
+ return NULL;
+ }
+
+ return res;
+}
+
+/* Loop over each preallocated buffer slot. */
+#define fec_for_each_prealloc_buffer(__i) \
+ for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++)
+
+/* Loop over each extra buffer slot. */
+#define fec_for_each_extra_buffer(io, __i) \
+ for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; \
+ __i++)
+
+/* Loop over each allocated buffer. */
+#define fec_for_each_buffer(io, __i) \
+ for (__i = 0; __i < (io)->nbufs; __i++)
+
+/* Loop over each RS block in each allocated buffer. */
+#define fec_for_each_buffer_rs_block(io, __i, __j) \
+ fec_for_each_buffer(io, __i) \
+ for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++)
+
+/*
+ * Return a pointer to the current RS block when called inside
+ * fec_for_each_buffer_rs_block.
+ */
+static inline u8 *fec_buffer_rs_block(struct dm_verity *v,
+ struct dm_verity_fec_io *fio,
+ unsigned i, unsigned j)
+{
+ return &fio->bufs[i][j * v->fec->rsn];
+}
+
+/*
+ * Return an index to the current RS block when called inside
+ * fec_for_each_buffer_rs_block.
+ */
+static inline unsigned fec_buffer_rs_index(unsigned i, unsigned j)
+{
+ return (i << DM_VERITY_FEC_BUF_RS_BITS) + j;
+}
+
+/*
+ * Decode all RS blocks from buffers and copy corrected bytes into fio->output
+ * starting from block_offset.
+ */
+static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
+ u64 rsb, int byte_index, unsigned block_offset,
+ int neras)
+{
+ int r = -1, corrected = 0, res;
+ struct dm_buffer *buf;
+ unsigned n, i, offset;
+ u8 *par, *block;
+
+ par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
+ if (unlikely(!par))
+ return r;
+
+ /*
+ * Decode the RS blocks we have in bufs. Each RS block results in
+ * one corrected target byte and consumes fec->roots parity bytes.
+ */
+ fec_for_each_buffer_rs_block(fio, n, i) {
+ block = fec_buffer_rs_block(v, fio, n, i);
+ res = fec_decode_rs8(v, fio, block, &par[offset], neras);
+
+ if (res < 0)
+ goto error;
+
+ corrected += res;
+ fio->output[block_offset] = block[byte_index];
+
+ block_offset++;
+ if (block_offset >= 1 << v->data_dev_block_bits)
+ goto done;
+
+ /* read the next block when we run out of parity bytes */
+ offset += v->fec->roots;
+ if (offset >= 1 << v->data_dev_block_bits) {
+ dm_bufio_release(buf);
+
+ par = fec_read_parity(v, rsb, block_offset, &offset,
+ &buf);
+ if (unlikely(!par))
+ return r;
+ }
+ }
+
+done:
+ r = corrected;
+error:
+ dm_bufio_release(buf);
+
+ if (r < 0 && neras)
+ DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
+ v->data_dev->name, (unsigned long long)rsb, r);
+ else if (r > 0)
+ DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
+ v->data_dev->name, (unsigned long long)rsb,
+ r);
+
+ return r;
+}
+
+/*
+ * Locate data block erasures using verity hashes.
+ */
+static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *want_digest, u8 *data)
+{
+ if (unlikely(verity_hash(v, verity_io_hash_desc(v, io),
+ data, 1 << v->data_dev_block_bits,
+ verity_io_real_digest(v, io))))
+ return 0;
+
+ return memcmp(verity_io_real_digest(v, io), want_digest,
+ v->digest_size) != 0;
+}
+
+/*
+ * Read data blocks that are part of the RS block and deinterleave as much as
+ * fits into buffers. Check for erasure locations if neras is non-NULL.
+ */
+static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
+ u64 rsb, u64 target, unsigned block_offset,
+ int *neras)
+{
+ int i, j, target_index = -1;
+ struct dm_buffer *buf;
+ struct dm_bufio_client *bufio;
+ struct dm_verity_fec_io *fio = fec_io(io);
+ u64 block, ileaved;
+ u8 *bbuf, *rs_block;
+ u8 want_digest[v->digest_size];
+ unsigned n, k;
+
+ if (neras)
+ *neras = 0;
+
+ /*
+ * read each of the rsn data blocks that are part of the RS block, and
+ * interleave contents to available bufs
+ */
+ for (i = 0; i < v->fec->rsn; i++) {
+ ileaved = fec_interleave(v, rsb * v->fec->rsn + i);
+
+ /*
+ * target is the data block we want to correct, target_index is
+ * the index of this block within the rsn RS blocks
+ */
+ if (ileaved == target)
+ target_index = i;
+
+ block = ileaved >> v->data_dev_block_bits;
+ bufio = v->fec->data_bufio;
+
+ if (block >= v->data_blocks) {
+ block -= v->data_blocks;
+
+ /*
+ * blocks outside the area were assumed to contain
+ * zeros when encoding data was generated
+ */
+ if (unlikely(block >= v->fec->hash_blocks))
+ continue;
+
+ block += v->hash_start;
+ bufio = v->bufio;
+ }
+
+ bbuf = dm_bufio_read(bufio, block, &buf);
+
+ if (unlikely(IS_ERR(bbuf))) {
+ DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
+ v->data_dev->name,
+ (unsigned long long)rsb,
+ (unsigned long long)block, PTR_ERR(bbuf));
+
+ /* assume the block is corrupted */
+ if (neras && *neras <= v->fec->roots)
+ fio->erasures[(*neras)++] = i;
+
+ continue;
+ }
+
+ /* locate erasures if the block is on the data device */
+ if (bufio == v->fec->data_bufio &&
+ verity_hash_for_block(v, io, block, want_digest) == 0) {
+ /*
+ * skip if we have already found the theoretical
+ * maximum number (i.e. fec->roots) of erasures
+ */
+ if (neras && *neras <= v->fec->roots &&
+ fec_is_erasure(v, io, want_digest, bbuf))
+ fio->erasures[(*neras)++] = i;
+ }
+
+ /*
+ * deinterleave and copy the bytes that fit into bufs,
+ * starting from block_offset
+ */
+ fec_for_each_buffer_rs_block(fio, n, j) {
+ k = fec_buffer_rs_index(n, j) + block_offset;
+
+ if (k >= 1 << v->data_dev_block_bits)
+ goto done;
+
+ rs_block = fec_buffer_rs_block(v, fio, n, j);
+ rs_block[i] = bbuf[k];
+ }
+
+done:
+ dm_bufio_release(buf);
+ }
+
+ return target_index;
+}
+
+/*
+ * Allocate RS control structure and FEC buffers from preallocated mempools,
+ * and attempt to allocate as many extra buffers as available.
+ */
+static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
+{
+ unsigned n;
+
+ if (!fio->rs) {
+ fio->rs = mempool_alloc(v->fec->rs_pool, 0);
+
+ if (unlikely(!fio->rs)) {
+ DMERR("failed to allocate RS");
+ return -ENOMEM;
+ }
+ }
+
+ fec_for_each_prealloc_buffer(n) {
+ if (fio->bufs[n])
+ continue;
+
+ fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOIO);
+
+ if (unlikely(!fio->bufs[n])) {
+ DMERR("failed to allocate FEC buffer");
+ return -ENOMEM;
+ }
+ }
+
+ /* try to allocate the maximum number of buffers */
+ fec_for_each_extra_buffer(fio, n) {
+ if (fio->bufs[n])
+ continue;
+
+ fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOIO);
+
+ /* we can manage with even one buffer if necessary */
+ if (unlikely(!fio->bufs[n]))
+ break;
+ }
+
+ fio->nbufs = n;
+
+ if (!fio->output) {
+ fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO);
+
+ if (!fio->output) {
+ DMERR("failed to allocate FEC page");
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Initialize buffers and clear erasures. fec_read_bufs assumes buffers are
+ * zeroed before deinterleaving.
+ */
+static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
+{
+ unsigned n;
+
+ fec_for_each_buffer(fio, n)
+ memset(fio->bufs[n], 0,
+ v->fec->rsn << DM_VERITY_FEC_BUF_RS_BITS);
+
+ memset(fio->erasures, 0, sizeof(fio->erasures));
+}
+
+/*
+ * Decode all RS blocks in a single data block and return the target block
+ * (indicated by "offset") in fio->output. If use_erasures is non-zero, uses
+ * hashes to locate erasures.
+ */
+static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
+ struct dm_verity_fec_io *fio, u64 rsb, u64 offset,
+ int use_erasures)
+{
+ int r, neras = 0;
+ unsigned pos;
+
+ r = fec_alloc_bufs(v, fio);
+ if (unlikely(r < 0))
+ return -1;
+
+ for (pos = 0; pos < 1 << v->data_dev_block_bits; ) {
+ fec_init_bufs(v, fio);
+
+ r = fec_read_bufs(v, io, rsb, offset, pos,
+ use_erasures ? &neras : NULL);
+ if (unlikely(r < 0))
+ return r;
+
+ r = fec_decode_bufs(v, fio, rsb, r, pos, neras);
+ if (r < 0)
+ return r;
+
+ pos += fio->nbufs << DM_VERITY_FEC_BUF_RS_BITS;
+ }
+
+ /* Always re-validate the corrected block against the expected hash */
+ r = verity_hash(v, verity_io_hash_desc(v, io), fio->output,
+ 1 << v->data_dev_block_bits,
+ verity_io_real_digest(v, io));
+ if (unlikely(r < 0))
+ return r;
+
+ if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io),
+ v->digest_size)) {
+ DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
+ v->data_dev->name, (unsigned long long)rsb,
+ neras);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io, u8 *data,
+ size_t len)
+{
+ struct dm_verity_fec_io *fio = fec_io(io);
+
+ memcpy(data, &fio->output[fio->output_pos], len);
+ fio->output_pos += len;
+
+ return 0;
+}
+
+/*
+ * Correct errors in a block. Copies corrected block to dest if non-NULL,
+ * otherwise to a bio_vec starting from iter.
+ */
+int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+ enum verity_block_type type, sector_t block, u8 *dest,
+ struct bvec_iter *iter)
+{
+ int r = -1;
+ struct dm_verity_fec_io *fio = fec_io(io);
+ u64 offset, res, rsb;
+
+ if (!verity_fec_is_enabled(v))
+ return -1;
+
+ if (type == DM_VERITY_BLOCK_TYPE_METADATA)
+ block += v->data_blocks;
+
+ /*
+ * For RS(M, N), the continuous FEC data is divided into blocks of N
+ * bytes. Since block size may not be divisible by N, the last block
+ * is zero padded when decoding.
+ *
+ * Each byte of the block is covered by a different RS(M, N) code,
+ * and each code is interleaved over N blocks to make it less likely
+ * that bursty corruption will leave us in unrecoverable state.
+ */
+
+ offset = block << v->data_dev_block_bits;
+
+ res = offset;
+ do_div(res, v->fec->rounds << v->data_dev_block_bits);
+
+ /*
+ * The base RS block we can feed to the interleaver to find out all
+ * blocks required for decoding.
+ */
+ rsb = offset - res * (v->fec->rounds << v->data_dev_block_bits);
+
+ /*
+ * Locating erasures is slow, so attempt to recover the block without
+ * them first. Do a second attempt with erasures if the corruption is
+ * bad enough.
+ */
+ r = fec_decode_rsb(v, io, fio, rsb, offset, 0);
+ if (r < 0)
+ r = fec_decode_rsb(v, io, fio, rsb, offset, 1);
+
+ if (r < 0)
+ return r;
+
+ if (dest)
+ memcpy(dest, fio->output, 1 << v->data_dev_block_bits);
+ else if (iter) {
+ fio->output_pos = 0;
+ r = verity_for_bv_block(v, io, iter, fec_bv_copy);
+ }
+
+ return r;
+}
+
+/*
+ * Clean up per-bio data.
+ */
+void verity_fec_finish_io(struct dm_verity_io *io)
+{
+ unsigned n;
+ struct dm_verity_fec *f = io->v->fec;
+ struct dm_verity_fec_io *fio = fec_io(io);
+
+ if (!verity_fec_is_enabled(io->v))
+ return;
+
+ mempool_free(fio->rs, f->rs_pool);
+
+ fec_for_each_prealloc_buffer(n)
+ mempool_free(fio->bufs[n], f->prealloc_pool);
+
+ fec_for_each_extra_buffer(fio, n)
+ mempool_free(fio->bufs[n], f->extra_pool);
+
+ mempool_free(fio->output, f->output_pool);
+}
+
+/*
+ * Initialize per-bio data.
+ */
+void verity_fec_init_io(struct dm_verity_io *io)
+{
+ struct dm_verity_fec_io *fio = fec_io(io);
+
+ if (!verity_fec_is_enabled(io->v))
+ return;
+
+ fio->rs = NULL;
+ memset(fio->bufs, 0, sizeof(fio->bufs));
+ fio->nbufs = 0;
+ fio->output = NULL;
+}
+
+/*
+ * Append feature arguments and values to the status table.
+ */
+unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
+ char *result, unsigned maxlen)
+{
+ if (!verity_fec_is_enabled(v))
+ return sz;
+
+ DMEMIT(" " DM_VERITY_OPT_FEC_DEV " %s "
+ DM_VERITY_OPT_FEC_BLOCKS " %llu "
+ DM_VERITY_OPT_FEC_START " %llu "
+ DM_VERITY_OPT_FEC_ROOTS " %d",
+ v->fec->dev->name,
+ (unsigned long long)v->fec->blocks,
+ (unsigned long long)v->fec->start,
+ v->fec->roots);
+
+ return sz;
+}
+
+void verity_fec_dtr(struct dm_verity *v)
+{
+ struct dm_verity_fec *f = v->fec;
+
+ if (!verity_fec_is_enabled(v))
+ goto out;
+
+ mempool_destroy(f->rs_pool);
+ mempool_destroy(f->prealloc_pool);
+ mempool_destroy(f->extra_pool);
+ kmem_cache_destroy(f->cache);
+
+ if (f->data_bufio)
+ dm_bufio_client_destroy(f->data_bufio);
+ if (f->bufio)
+ dm_bufio_client_destroy(f->bufio);
+
+ if (f->dev)
+ dm_put_device(v->ti, f->dev);
+
+out:
+ kfree(f);
+ v->fec = NULL;
+}
+
+static void *fec_rs_alloc(gfp_t gfp_mask, void *pool_data)
+{
+ struct dm_verity *v = (struct dm_verity *)pool_data;
+
+ return init_rs(8, 0x11d, 0, 1, v->fec->roots);
+}
+
+static void fec_rs_free(void *element, void *pool_data)
+{
+ struct rs_control *rs = (struct rs_control *)element;
+
+ if (rs)
+ free_rs(rs);
+}
+
+int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
+ unsigned *argc, const char *arg_name)
+{
+ int r;
+ const char *arg_value;
+ unsigned long long num_ll;
+ unsigned char num_c;
+ char dummy;
+
+ /* All feature arguments require a value */
+ if (!*argc)
+ return -EINVAL;
+
+ arg_value = dm_shift_arg(as);
+ (*argc)--;
+
+ if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) {
+ r = dm_get_device(v->ti, arg_value, FMODE_READ, &v->fec->dev);
+ if (r) {
+ v->ti->error = "FEC device lookup failed";
+ return r;
+ }
+
+ return 0;
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS)) {
+ if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
+ (sector_t)(num_ll <<
+ (v->data_dev_block_bits - SECTOR_SHIFT))
+ >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
+ v->ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+ return -EINVAL;
+ }
+
+ v->fec->blocks = num_ll;
+ return 0;
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_START)) {
+ if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
+ (sector_t)(num_ll <<
+ (v->data_dev_block_bits - SECTOR_SHIFT))
+ >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll) {
+ v->ti->error = "Invalid " DM_VERITY_OPT_FEC_START;
+ return -EINVAL;
+ }
+
+ v->fec->start = num_ll;
+ return 0;
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)) {
+ if (sscanf(arg_value, "%hhu%c", &num_c, &dummy) != 1 ||
+ !num_c ||
+ num_c < (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MAX_RSN) ||
+ num_c > (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN)) {
+ v->ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS;
+ return -EINVAL;
+ }
+
+ v->fec->roots = num_c;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr.
+ */
+int verity_fec_ctr_alloc(struct dm_verity *v)
+{
+ struct dm_verity_fec *f;
+
+ f = kzalloc(sizeof(struct dm_verity_fec), GFP_KERNEL);
+ if (!f) {
+ v->ti->error = "Cannot allocate FEC structure";
+ return -ENOMEM;
+ }
+
+ v->fec = f;
+ return 0;
+}
+
+/*
+ * Validate arguments and preallocate memory. Must be called after arguments
+ * have been parsed using verity_fec_parse_opt_args.
+ */
+int verity_fec_ctr(struct dm_verity *v)
+{
+ struct dm_verity_fec *f = v->fec;
+ u64 hash_blocks;
+
+ if (!verity_fec_is_enabled(v)) {
+ verity_fec_dtr(v);
+ return 0;
+ }
+
+ /*
+ * FEC is computed over data blocks, possible metadata, and
+ * hash blocks. In other words, FEC covers total of fec_blocks
+ * blocks consisting of the following:
+ *
+ * data blocks | hash blocks | metadata (optional)
+ *
+ * We allow metadata after hash blocks to support a use case
+ * where all data is stored on the same device and FEC covers
+ * the entire area.
+ *
+ * If metadata is included, we require it to be available on the
+ * hash device after the hash blocks.
+ */
+
+ hash_blocks = v->hash_blocks - v->hash_start;
+
+ /*
+ * Require matching block sizes for data and hash devices for
+ * simplicity.
+ */
+ if (v->data_dev_block_bits != v->hash_dev_block_bits) {
+ v->ti->error = "Block sizes must match to use FEC";
+ return -EINVAL;
+ }
+
+ if (!f->roots) {
+ v->ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS;
+ return -EINVAL;
+ }
+
+ f->rsn = DM_VERITY_FEC_RSM - f->roots;
+
+ if (!f->blocks) {
+ v->ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS;
+ return -EINVAL;
+ }
+
+ f->rounds = f->blocks;
+
+ if (do_div(f->rounds, f->rsn))
+ f->rounds++;
+
+ /*
+ * Due to optional metadata, f->blocks can be larger than
+ * data_blocks and hash_blocks combined.
+ */
+ if (f->blocks < v->data_blocks + hash_blocks || !f->rounds) {
+ v->ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+ return -EINVAL;
+ }
+
+ /*
+ * Metadata is accessed through the hash device, so we require
+ * it to be large enough.
+ */
+ f->hash_blocks = f->blocks - v->data_blocks;
+
+ if (dm_bufio_get_device_size(v->bufio) < f->hash_blocks) {
+ v->ti->error = "Hash device is too small for "
+ DM_VERITY_OPT_FEC_BLOCKS;
+ return -E2BIG;
+ }
+
+ f->bufio = dm_bufio_client_create(f->dev->bdev,
+ 1 << v->data_dev_block_bits,
+ 1, 0, NULL, NULL);
+
+ if (IS_ERR(f->bufio)) {
+ v->ti->error = "Cannot initialize dm-bufio";
+ return PTR_ERR(f->bufio);
+ }
+
+ if (dm_bufio_get_device_size(f->bufio) <
+ (f->start + f->rounds * f->roots)
+ >> v->data_dev_block_bits) {
+ v->ti->error = "FEC device is too small";
+ return -E2BIG;
+ }
+
+ f->data_bufio = dm_bufio_client_create(v->data_dev->bdev,
+ 1 << v->data_dev_block_bits,
+ 1, 0, NULL, NULL);
+
+ if (IS_ERR(f->data_bufio)) {
+ v->ti->error = "Cannot initialize dm-bufio";
+ return PTR_ERR(f->data_bufio);
+ }
+
+ if (dm_bufio_get_device_size(f->data_bufio) < v->data_blocks) {
+ v->ti->error = "Data device is too small";
+ return -E2BIG;
+ }
+
+ /* Preallocate an rs_control structure for each worker thread */
+ f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc,
+ fec_rs_free, (void *) v);
+
+ if (!f->rs_pool) {
+ v->ti->error = "Cannot allocate RS pool";
+ return -ENOMEM;
+ }
+
+ f->cache = kmem_cache_create("dm_verity_fec_buffers",
+ f->rsn << DM_VERITY_FEC_BUF_RS_BITS,
+ 0, 0, NULL);
+
+ if (!f->cache) {
+ v->ti->error = "Cannot create FEC buffer cache";
+ return -ENOMEM;
+ }
+
+ /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
+ f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() *
+ DM_VERITY_FEC_BUF_PREALLOC,
+ f->cache);
+
+ if (!f->prealloc_pool) {
+ v->ti->error = "Cannot allocate FEC buffer prealloc pool";
+ return -ENOMEM;
+ }
+
+ f->extra_pool = mempool_create_slab_pool(0, f->cache);
+
+ if (!f->extra_pool) {
+ v->ti->error = "Cannot allocate FEC buffer extra pool";
+ return -ENOMEM;
+ }
+
+ /* Preallocate an output buffer for each thread */
+ f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(),
+ 1 << v->data_dev_block_bits);
+
+ if (!f->output_pool) {
+ v->ti->error = "Cannot allocate FEC output pool";
+ return -ENOMEM;
+ }
+
+ /* Reserve space for our per-bio data */
+ v->ti->per_bio_data_size += sizeof(struct dm_verity_fec_io);
+
+ return 0;
+}
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
new file mode 100644
index 0000000..420c97c
--- /dev/null
+++ b/drivers/md/dm-verity-fec.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Sami Tolvanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef DM_VERITY_FEC_H
+#define DM_VERITY_FEC_H
+
+#include "dm-verity.h"
+#include <linux/rslib.h>
+
+/* Reed-Solomon(M, N) parameters */
+#define DM_VERITY_FEC_RSM 255
+#define DM_VERITY_FEC_MAX_RSN 253
+#define DM_VERITY_FEC_MIN_RSN 231 /* ~10% space overhead */
+
+/* buffers for deinterleaving and decoding */
+#define DM_VERITY_FEC_BUF_PREALLOC 1 /* buffers to preallocate */
+#define DM_VERITY_FEC_BUF_RS_BITS 4 /* 1 << RS blocks per buffer */
+/* we need buffers for at most 1 << block size RS blocks */
+#define DM_VERITY_FEC_BUF_MAX \
+ (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
+
+#define DM_VERITY_OPT_FEC_DEV "use_fec_from_device"
+#define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks"
+#define DM_VERITY_OPT_FEC_START "fec_start"
+#define DM_VERITY_OPT_FEC_ROOTS "fec_roots"
+
+/* configuration */
+struct dm_verity_fec {
+ struct dm_dev *dev; /* parity data device */
+ struct dm_bufio_client *data_bufio; /* for data dev access */
+ struct dm_bufio_client *bufio; /* for parity data access */
+ sector_t start; /* parity data start in blocks */
+ sector_t blocks; /* number of blocks covered */
+ sector_t rounds; /* number of interleaving rounds */
+ sector_t hash_blocks; /* blocks covered after v->hash_start */
+ unsigned char roots; /* number of parity bytes, M-N of RS(M, N) */
+ unsigned char rsn; /* N of RS(M, N) */
+ mempool_t *rs_pool; /* mempool for fio->rs */
+ mempool_t *prealloc_pool; /* mempool for preallocated buffers */
+ mempool_t *extra_pool; /* mempool for extra buffers */
+ mempool_t *output_pool; /* mempool for output */
+ struct kmem_cache *cache; /* cache for buffers */
+};
+
+/* per-bio data */
+struct dm_verity_fec_io {
+ struct rs_control *rs; /* Reed-Solomon state */
+ int erasures[DM_VERITY_FEC_MAX_RSN]; /* erasures for decode_rs8 */
+ u8 *bufs[DM_VERITY_FEC_BUF_MAX]; /* bufs for deinterleaving */
+ unsigned nbufs; /* number of buffers allocated */
+ u8 *output; /* buffer for corrected output */
+ size_t output_pos;
+};
+
+#ifdef CONFIG_DM_VERITY_FEC
+
+/* each feature parameter requires a value */
+#define DM_VERITY_OPTS_FEC 8
+
+extern bool verity_fec_is_enabled(struct dm_verity *v);
+
+extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
+ enum verity_block_type type, sector_t block,
+ u8 *dest, struct bvec_iter *iter);
+
+extern unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
+ char *result, unsigned maxlen);
+
+extern void verity_fec_finish_io(struct dm_verity_io *io);
+extern void verity_fec_init_io(struct dm_verity_io *io);
+
+extern int verity_fec_parse_opt_args(struct dm_arg_set *as,
+ struct dm_verity *v, unsigned *argc,
+ const char *arg_name);
+
+extern void verity_fec_dtr(struct dm_verity *v);
+
+extern int verity_fec_ctr_alloc(struct dm_verity *v);
+extern int verity_fec_ctr(struct dm_verity *v);
+
+#else /* !CONFIG_DM_VERITY_FEC */
+
+#define DM_VERITY_OPTS_FEC 0
+
+static inline bool verity_fec_is_enabled(struct dm_verity *v)
+{
+ return false;
+}
+
+static inline int verity_fec_decode(struct dm_verity *v,
+ struct dm_verity_io *io,
+ enum verity_block_type type,
+ sector_t block, u8 *dest,
+ struct bvec_iter *iter)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline unsigned verity_fec_status_table(struct dm_verity *v,
+ unsigned sz, char *result,
+ unsigned maxlen)
+{
+ return sz;
+}
+
+static inline void verity_fec_finish_io(struct dm_verity_io *io)
+{
+}
+
+static inline void verity_fec_init_io(struct dm_verity_io *io)
+{
+}
+
+static inline int verity_fec_parse_opt_args(struct dm_arg_set *as,
+ struct dm_verity *v,
+ unsigned *argc,
+ const char *arg_name)
+{
+ return -EINVAL;
+}
+
+static inline void verity_fec_dtr(struct dm_verity *v)
+{
+}
+
+static inline int verity_fec_ctr_alloc(struct dm_verity *v)
+{
+ return 0;
+}
+
+static inline int verity_fec_ctr(struct dm_verity *v)
+{
+ return 0;
+}
+
+#endif /* CONFIG_DM_VERITY_FEC */
+
+#endif /* DM_VERITY_FEC_H */
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity-target.c
similarity index 84%
rename from drivers/md/dm-verity.c
rename to drivers/md/dm-verity-target.c
index b0a53c3..ca5857b 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity-target.c
@@ -14,12 +14,11 @@
* access behavior.
*/
-#include "dm-bufio.h"
+#include "dm-verity.h"
+#include "dm-verity-fec.h"
#include <linux/module.h>
-#include <linux/device-mapper.h>
#include <linux/reboot.h>
-#include <crypto/hash.h>
#define DM_MSG_PREFIX "verity"
@@ -28,84 +27,17 @@
#define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144
-#define DM_VERITY_MAX_LEVELS 63
#define DM_VERITY_MAX_CORRUPTED_ERRS 100
#define DM_VERITY_OPT_LOGGING "ignore_corruption"
#define DM_VERITY_OPT_RESTART "restart_on_corruption"
-#define DM_VERITY_OPTS_MAX 1
+#define DM_VERITY_OPTS_MAX (1 + DM_VERITY_OPTS_FEC)
static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR);
-enum verity_mode {
- DM_VERITY_MODE_EIO,
- DM_VERITY_MODE_LOGGING,
- DM_VERITY_MODE_RESTART
-};
-
-enum verity_block_type {
- DM_VERITY_BLOCK_TYPE_DATA,
- DM_VERITY_BLOCK_TYPE_METADATA
-};
-
-struct dm_verity {
- struct dm_dev *data_dev;
- struct dm_dev *hash_dev;
- struct dm_target *ti;
- struct dm_bufio_client *bufio;
- char *alg_name;
- struct crypto_shash *tfm;
- u8 *root_digest; /* digest of the root block */
- u8 *salt; /* salt: its size is salt_size */
- unsigned salt_size;
- sector_t data_start; /* data offset in 512-byte sectors */
- sector_t hash_start; /* hash start in blocks */
- sector_t data_blocks; /* the number of data blocks */
- sector_t hash_blocks; /* the number of hash blocks */
- unsigned char data_dev_block_bits; /* log2(data blocksize) */
- unsigned char hash_dev_block_bits; /* log2(hash blocksize) */
- unsigned char hash_per_block_bits; /* log2(hashes in hash block) */
- unsigned char levels; /* the number of tree levels */
- unsigned char version;
- unsigned digest_size; /* digest size for the current hash algorithm */
- unsigned shash_descsize;/* the size of temporary space for crypto */
- int hash_failed; /* set to 1 if hash of any block failed */
- enum verity_mode mode; /* mode for handling verification errors */
- unsigned corrupted_errs;/* Number of errors for corrupted blocks */
-
- struct workqueue_struct *verify_wq;
-
- /* starting blocks for each tree level. 0 is the lowest level. */
- sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
-};
-
-struct dm_verity_io {
- struct dm_verity *v;
-
- /* original value of bio->bi_end_io */
- bio_end_io_t *orig_bi_end_io;
-
- sector_t block;
- unsigned n_blocks;
-
- struct bvec_iter iter;
-
- struct work_struct work;
-
- /*
- * Three variably-size fields follow this struct:
- *
- * u8 hash_desc[v->shash_descsize];
- * u8 real_digest[v->digest_size];
- * u8 want_digest[v->digest_size];
- *
- * To access them use: io_hash_desc(), io_real_digest() and io_want_digest().
- */
-};
-
struct dm_verity_prefetch_work {
struct work_struct work;
struct dm_verity *v;
@@ -113,21 +45,6 @@ struct dm_verity_prefetch_work {
unsigned n_blocks;
};
-static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
-{
- return (struct shash_desc *)(io + 1);
-}
-
-static u8 *io_real_digest(struct dm_verity *v, struct dm_verity_io *io)
-{
- return (u8 *)(io + 1) + v->shash_descsize;
-}
-
-static u8 *io_want_digest(struct dm_verity *v, struct dm_verity_io *io)
-{
- return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
-}
-
/*
* Auxiliary structure appended to each dm-bufio buffer. If the value
* hash_verified is nonzero, hash of the block has been verified.
@@ -236,8 +153,8 @@ static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc,
return r;
}
-static int verity_hash(struct dm_verity *v, struct shash_desc *desc,
- const u8 *data, size_t len, u8 *digest)
+int verity_hash(struct dm_verity *v, struct shash_desc *desc,
+ const u8 *data, size_t len, u8 *digest)
{
int r;
@@ -325,12 +242,12 @@ out:
* Verify hash of a metadata block pertaining to the specified data block
* ("block" argument) at a specified level ("level" argument).
*
- * On successful return, io_want_digest(v, io) contains the hash value for
- * a lower tree level or for the data block (if we're at the lowest leve).
+ * On successful return, verity_io_want_digest(v, io) contains the hash value
+ * for a lower tree level or for the data block (if we're at the lowest level).
*
* If "skip_unverified" is true, unverified buffer is skipped and 1 is returned.
* If "skip_unverified" is false, unverified buffer is hashed and verified
- * against current value of io_want_digest(v, io).
+ * against current value of verity_io_want_digest(v, io).
*/
static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, int level, bool skip_unverified,
@@ -357,15 +274,19 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
goto release_ret_r;
}
- r = verity_hash(v, io_hash_desc(v, io),
+ r = verity_hash(v, verity_io_hash_desc(v, io),
data, 1 << v->hash_dev_block_bits,
- io_real_digest(v, io));
+ verity_io_real_digest(v, io));
if (unlikely(r < 0))
goto release_ret_r;
- if (likely(memcmp(io_real_digest(v, io), want_digest,
+ if (likely(memcmp(verity_io_real_digest(v, io), want_digest,
v->digest_size) == 0))
aux->hash_verified = 1;
+ else if (verity_fec_decode(v, io,
+ DM_VERITY_BLOCK_TYPE_METADATA,
+ hash_block, data, NULL) == 0)
+ aux->hash_verified = 1;
else if (verity_handle_err(v,
DM_VERITY_BLOCK_TYPE_METADATA,
hash_block)) {
@@ -387,8 +308,8 @@ release_ret_r:
* Find a hash for a given block, write it to digest and verify the integrity
* of the hash tree if necessary.
*/
-static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
- sector_t block, u8 *digest)
+int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
+ sector_t block, u8 *digest)
{
int i;
int r;
@@ -418,22 +339,65 @@ static int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
}
/*
+ * Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec
+ * starting from iter.
+ */
+int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
+ struct bvec_iter *iter,
+ int (*process)(struct dm_verity *v,
+ struct dm_verity_io *io, u8 *data,
+ size_t len))
+{
+ unsigned todo = 1 << v->data_dev_block_bits;
+ struct bio *bio = dm_bio_from_per_bio_data(io,
+ v->ti->per_bio_data_size);
+
+ do {
+ int r;
+ u8 *page;
+ unsigned len;
+ struct bio_vec bv = bio_iter_iovec(bio, *iter);
+
+ page = kmap_atomic(bv.bv_page);
+ len = bv.bv_len;
+
+ if (likely(len >= todo))
+ len = todo;
+
+ r = process(v, io, page + bv.bv_offset, len);
+ kunmap_atomic(page);
+
+ if (r < 0)
+ return r;
+
+ bio_advance_iter(bio, iter, len);
+ todo -= len;
+ } while (todo);
+
+ return 0;
+}
+
+static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *data, size_t len)
+{
+ return verity_hash_update(v, verity_io_hash_desc(v, io), data, len);
+}
+
+/*
* Verify one "dm_verity_io" structure.
*/
static int verity_verify_io(struct dm_verity_io *io)
{
struct dm_verity *v = io->v;
- struct bio *bio = dm_bio_from_per_bio_data(io,
- v->ti->per_bio_data_size);
+ struct bvec_iter start;
unsigned b;
for (b = 0; b < io->n_blocks; b++) {
int r;
- unsigned todo;
- struct shash_desc *desc = io_hash_desc(v, io);
+ struct shash_desc *desc = verity_io_hash_desc(v, io);
r = verity_hash_for_block(v, io, io->block + b,
- io_want_digest(v, io));
+ verity_io_want_digest(v, io));
if (unlikely(r < 0))
return r;
@@ -441,36 +405,25 @@ static int verity_verify_io(struct dm_verity_io *io)
if (unlikely(r < 0))
return r;
- todo = 1 << v->data_dev_block_bits;
- do {
- u8 *page;
- unsigned len;
- struct bio_vec bv = bio_iter_iovec(bio, io->iter);
-
- page = kmap_atomic(bv.bv_page);
- len = bv.bv_len;
- if (likely(len >= todo))
- len = todo;
- r = verity_hash_update(v, desc, page + bv.bv_offset,
- len);
- kunmap_atomic(page);
-
- if (unlikely(r < 0))
- return r;
-
- bio_advance_iter(bio, &io->iter, len);
- todo -= len;
- } while (todo);
+ start = io->iter;
+ r = verity_for_bv_block(v, io, &io->iter,
+ verity_bv_hash_update);
+ if (unlikely(r < 0))
+ return r;
- r = verity_hash_final(v, desc, io_real_digest(v, io));
+ r = verity_hash_final(v, desc, verity_io_real_digest(v, io));
if (unlikely(r < 0))
return r;
- if (likely(memcmp(io_real_digest(v, io),
- io_want_digest(v, io), v->digest_size) == 0))
+ if (likely(memcmp(verity_io_real_digest(v, io),
+ verity_io_want_digest(v, io),
+ v->digest_size) == 0))
+ continue;
+ else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA,
+ io->block + b, NULL, &start) == 0)
continue;
else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA,
- io->block + b))
+ io->block + b))
return -EIO;
}
@@ -488,6 +441,8 @@ static void verity_finish_io(struct dm_verity_io *io, int error)
bio->bi_end_io = io->orig_bi_end_io;
bio->bi_error = error;
+ verity_fec_finish_io(io);
+
bio_endio(bio);
}
@@ -502,7 +457,7 @@ static void verity_end_io(struct bio *bio)
{
struct dm_verity_io *io = bio->bi_private;
- if (bio->bi_error) {
+ if (bio->bi_error && !verity_fec_is_enabled(io->v)) {
verity_finish_io(io, bio->bi_error);
return;
}
@@ -605,6 +560,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
bio->bi_private = io;
io->iter = bio->bi_iter;
+ verity_fec_init_io(io);
+
verity_submit_prefetch(v, io);
generic_make_request(bio);
@@ -619,6 +576,7 @@ static void verity_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
struct dm_verity *v = ti->private;
+ unsigned args = 0;
unsigned sz = 0;
unsigned x;
@@ -645,8 +603,15 @@ static void verity_status(struct dm_target *ti, status_type_t type,
else
for (x = 0; x < v->salt_size; x++)
DMEMIT("%02x", v->salt[x]);
+ if (v->mode != DM_VERITY_MODE_EIO)
+ args++;
+ if (verity_fec_is_enabled(v))
+ args += DM_VERITY_OPTS_FEC;
+ if (!args)
+ return;
+ DMEMIT(" %u", args);
if (v->mode != DM_VERITY_MODE_EIO) {
- DMEMIT(" 1 ");
+ DMEMIT(" ");
switch (v->mode) {
case DM_VERITY_MODE_LOGGING:
DMEMIT(DM_VERITY_OPT_LOGGING);
@@ -658,6 +623,7 @@ static void verity_status(struct dm_target *ti, status_type_t type,
BUG();
}
}
+ sz = verity_fec_status_table(v, sz, result, maxlen);
break;
}
}
@@ -720,6 +686,8 @@ static void verity_dtr(struct dm_target *ti)
if (v->data_dev)
dm_put_device(ti, v->data_dev);
+ verity_fec_dtr(v);
+
kfree(v);
}
@@ -752,10 +720,12 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
} else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) {
v->mode = DM_VERITY_MODE_RESTART;
continue;
+ } else {
+ r = verity_fec_parse_opt_args(as, v, &argc, arg_name);
}
- ti->error = "Unrecognized verity feature request";
- return -EINVAL;
+ if (r && !ti->error)
+ ti->error = "Unrecognized verity feature request";
} while (argc && !r);
return r;
@@ -794,6 +764,10 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->private = v;
v->ti = ti;
+ r = verity_fec_ctr_alloc(v);
+ if (r)
+ goto bad;
+
if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
ti->error = "Device must be readonly";
r = -EINVAL;
@@ -982,8 +956,6 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
- ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io));
-
/* WQ_UNBOUND greatly improves performance when running on ramdisk */
v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus());
if (!v->verify_wq) {
@@ -992,6 +964,16 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
+ ti->per_bio_data_size = sizeof(struct dm_verity_io) +
+ v->shash_descsize + v->digest_size * 2;
+
+ r = verity_fec_ctr(v);
+ if (r)
+ goto bad;
+
+ ti->per_bio_data_size = roundup(ti->per_bio_data_size,
+ __alignof__(struct dm_verity_io));
+
return 0;
bad:
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
new file mode 100644
index 0000000..8e85372
--- /dev/null
+++ b/drivers/md/dm-verity.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ * Copyright (C) 2015 Google, Inc.
+ *
+ * Author: Mikulas Patocka <[email protected]>
+ *
+ * Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef DM_VERITY_H
+#define DM_VERITY_H
+
+#include "dm-bufio.h"
+#include <linux/device-mapper.h>
+#include <crypto/hash.h>
+
+#define DM_VERITY_MAX_LEVELS 63
+
+enum verity_mode {
+ DM_VERITY_MODE_EIO,
+ DM_VERITY_MODE_LOGGING,
+ DM_VERITY_MODE_RESTART
+};
+
+enum verity_block_type {
+ DM_VERITY_BLOCK_TYPE_DATA,
+ DM_VERITY_BLOCK_TYPE_METADATA
+};
+
+struct dm_verity_fec;
+
+struct dm_verity {
+ struct dm_dev *data_dev;
+ struct dm_dev *hash_dev;
+ struct dm_target *ti;
+ struct dm_bufio_client *bufio;
+ char *alg_name;
+ struct crypto_shash *tfm;
+ u8 *root_digest; /* digest of the root block */
+ u8 *salt; /* salt: its size is salt_size */
+ unsigned salt_size;
+ sector_t data_start; /* data offset in 512-byte sectors */
+ sector_t hash_start; /* hash start in blocks */
+ sector_t data_blocks; /* the number of data blocks */
+ sector_t hash_blocks; /* the number of hash blocks */
+ unsigned char data_dev_block_bits; /* log2(data blocksize) */
+ unsigned char hash_dev_block_bits; /* log2(hash blocksize) */
+ unsigned char hash_per_block_bits; /* log2(hashes in hash block) */
+ unsigned char levels; /* the number of tree levels */
+ unsigned char version;
+ unsigned digest_size; /* digest size for the current hash algorithm */
+ unsigned shash_descsize;/* the size of temporary space for crypto */
+ int hash_failed; /* set to 1 if hash of any block failed */
+ enum verity_mode mode; /* mode for handling verification errors */
+ unsigned corrupted_errs;/* Number of errors for corrupted blocks */
+
+ struct workqueue_struct *verify_wq;
+
+ /* starting blocks for each tree level. 0 is the lowest level. */
+ sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
+
+ struct dm_verity_fec *fec; /* forward error correction */
+};
+
+struct dm_verity_io {
+ struct dm_verity *v;
+
+ /* original value of bio->bi_end_io */
+ bio_end_io_t *orig_bi_end_io;
+
+ sector_t block;
+ unsigned n_blocks;
+
+ struct bvec_iter iter;
+
+ struct work_struct work;
+
+ /*
+ * Three variably-size fields follow this struct:
+ *
+ * u8 hash_desc[v->shash_descsize];
+ * u8 real_digest[v->digest_size];
+ * u8 want_digest[v->digest_size];
+ *
+ * To access them use: verity_io_hash_desc(), verity_io_real_digest()
+ * and verity_io_want_digest().
+ */
+};
+
+static inline struct shash_desc *verity_io_hash_desc(struct dm_verity *v,
+ struct dm_verity_io *io)
+{
+ return (struct shash_desc *)(io + 1);
+}
+
+static inline u8 *verity_io_real_digest(struct dm_verity *v,
+ struct dm_verity_io *io)
+{
+ return (u8 *)(io + 1) + v->shash_descsize;
+}
+
+static inline u8 *verity_io_want_digest(struct dm_verity *v,
+ struct dm_verity_io *io)
+{
+ return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
+}
+
+static inline u8 *verity_io_digest_end(struct dm_verity *v,
+ struct dm_verity_io *io)
+{
+ return verity_io_want_digest(v, io) + v->digest_size;
+}
+
+extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
+ struct bvec_iter *iter,
+ int (*process)(struct dm_verity *v,
+ struct dm_verity_io *io,
+ u8 *data, size_t len));
+
+extern int verity_hash(struct dm_verity *v, struct shash_desc *desc,
+ const u8 *data, size_t len, u8 *digest);
+
+extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
+ sector_t block, u8 *digest);
+
+#endif /* DM_VERITY_H */
--
2.6.0.rc2.230.g3dd15c0
Add ignore_zero_blocks option, which returns zeros for blocks matching a
zero hash without validating the content.
Signed-off-by: Sami Tolvanen <[email protected]>
---
Documentation/device-mapper/verity.txt | 5 ++
drivers/md/dm-verity-fec.c | 8 +++-
drivers/md/dm-verity-target.c | 84 ++++++++++++++++++++++++++++++----
drivers/md/dm-verity.h | 3 +-
4 files changed, 90 insertions(+), 10 deletions(-)
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
index 1058f36..4d3bbda 100644
--- a/Documentation/device-mapper/verity.txt
+++ b/Documentation/device-mapper/verity.txt
@@ -79,6 +79,11 @@ restart_on_corruption
not compatible with ignore_corruption and requires user space support to
avoid restart loops.
+ignore_zero_blocks
+ Do not verify blocks that are expected to contain zeros and always return
+ zeros instead. This may be useful if the partition contains unused blocks
+ that are not guaranteed to contain zeros.
+
use_fec_from_device
Use forward error correction (FEC) to recover from corruption if hash
verification fails. Use encoding data from the specified device. This
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index c3a2531..599334f 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -210,6 +210,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
u64 rsb, u64 target, unsigned block_offset,
int *neras)
{
+ bool is_zero;
int i, j, target_index = -1;
struct dm_buffer *buf;
struct dm_bufio_client *bufio;
@@ -270,7 +271,12 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
/* locate erasures if the block is on the data device */
if (bufio == v->fec->data_bufio &&
- verity_hash_for_block(v, io, block, want_digest) == 0) {
+ verity_hash_for_block(v, io, block, want_digest,
+ &is_zero) == 0) {
+ /* skip known zero blocks entirely */
+ if (is_zero)
+ continue;
+
/*
* skip if we have already found the theoretical
* maximum number (i.e. fec->roots) of erasures
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index ca5857b..a2bb3bc 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -31,8 +31,9 @@
#define DM_VERITY_OPT_LOGGING "ignore_corruption"
#define DM_VERITY_OPT_RESTART "restart_on_corruption"
+#define DM_VERITY_OPT_IGN_ZEROS "ignore_zero_blocks"
-#define DM_VERITY_OPTS_MAX (1 + DM_VERITY_OPTS_FEC)
+#define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC)
static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
@@ -309,10 +310,9 @@ release_ret_r:
* of the hash tree if necessary.
*/
int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
- sector_t block, u8 *digest)
+ sector_t block, u8 *digest, bool *is_zero)
{
- int i;
- int r;
+ int r = 0, i;
if (likely(v->levels)) {
/*
@@ -324,7 +324,7 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
*/
r = verity_verify_level(v, io, block, 0, true, digest);
if (likely(r <= 0))
- return r;
+ goto out;
}
memcpy(digest, v->root_digest, v->digest_size);
@@ -332,10 +332,16 @@ int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
for (i = v->levels - 1; i >= 0; i--) {
r = verity_verify_level(v, io, block, i, false, digest);
if (unlikely(r))
- return r;
+ goto out;
}
- return 0;
+out:
+ if (!r && v->zero_digest)
+ *is_zero = !memcmp(v->zero_digest, digest, v->digest_size);
+ else
+ *is_zero = false;
+
+ return r;
}
/*
@@ -383,11 +389,19 @@ static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io,
return verity_hash_update(v, verity_io_hash_desc(v, io), data, len);
}
+static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
+ u8 *data, size_t len)
+{
+ memset(data, 0, len);
+ return 0;
+}
+
/*
* Verify one "dm_verity_io" structure.
*/
static int verity_verify_io(struct dm_verity_io *io)
{
+ bool is_zero;
struct dm_verity *v = io->v;
struct bvec_iter start;
unsigned b;
@@ -397,10 +411,24 @@ static int verity_verify_io(struct dm_verity_io *io)
struct shash_desc *desc = verity_io_hash_desc(v, io);
r = verity_hash_for_block(v, io, io->block + b,
- verity_io_want_digest(v, io));
+ verity_io_want_digest(v, io),
+ &is_zero);
if (unlikely(r < 0))
return r;
+ if (is_zero) {
+ /*
+ * If we expect a zero block, don't validate, just
+ * return zeros.
+ */
+ r = verity_for_bv_block(v, io, &io->iter,
+ verity_bv_zero);
+ if (unlikely(r < 0))
+ return r;
+
+ continue;
+ }
+
r = verity_hash_init(v, desc);
if (unlikely(r < 0))
return r;
@@ -607,6 +635,8 @@ static void verity_status(struct dm_target *ti, status_type_t type,
args++;
if (verity_fec_is_enabled(v))
args += DM_VERITY_OPTS_FEC;
+ if (v->zero_digest)
+ args++;
if (!args)
return;
DMEMIT(" %u", args);
@@ -623,6 +653,8 @@ static void verity_status(struct dm_target *ti, status_type_t type,
BUG();
}
}
+ if (v->zero_digest)
+ DMEMIT(" " DM_VERITY_OPT_IGN_ZEROS);
sz = verity_fec_status_table(v, sz, result, maxlen);
break;
}
@@ -674,6 +706,7 @@ static void verity_dtr(struct dm_target *ti)
kfree(v->salt);
kfree(v->root_digest);
+ kfree(v->zero_digest);
if (v->tfm)
crypto_free_shash(v->tfm);
@@ -691,6 +724,37 @@ static void verity_dtr(struct dm_target *ti)
kfree(v);
}
+static int verity_alloc_zero_digest(struct dm_verity *v)
+{
+ int r = -ENOMEM;
+ struct shash_desc *desc;
+ u8 *zero_data;
+
+ v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL);
+
+ if (!v->zero_digest)
+ return r;
+
+ desc = kmalloc(v->shash_descsize, GFP_KERNEL);
+
+ if (!desc)
+ return r; /* verity_dtr will free zero_digest */
+
+ zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL);
+
+ if (!zero_data)
+ goto out;
+
+ r = verity_hash(v, desc, zero_data, 1 << v->data_dev_block_bits,
+ v->zero_digest);
+
+out:
+ kfree(desc);
+ kfree(zero_data);
+
+ return r;
+}
+
static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
{
int r;
@@ -720,6 +784,10 @@ static int verity_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v)
} else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) {
v->mode = DM_VERITY_MODE_RESTART;
continue;
+ } else if (!strcasecmp(arg_name, DM_VERITY_OPT_IGN_ZEROS)) {
+ r = verity_alloc_zero_digest(v);
+ if (r)
+ ti->error = "Cannot allocate zero digest";
} else {
r = verity_fec_parse_opt_args(as, v, &argc, arg_name);
}
diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h
index 8e85372..fb419f4 100644
--- a/drivers/md/dm-verity.h
+++ b/drivers/md/dm-verity.h
@@ -40,6 +40,7 @@ struct dm_verity {
struct crypto_shash *tfm;
u8 *root_digest; /* digest of the root block */
u8 *salt; /* salt: its size is salt_size */
+ u8 *zero_digest; /* digest for a zero block */
unsigned salt_size;
sector_t data_start; /* data offset in 512-byte sectors */
sector_t hash_start; /* hash start in blocks */
@@ -123,6 +124,6 @@ extern int verity_hash(struct dm_verity *v, struct shash_desc *desc,
const u8 *data, size_t len, u8 *digest);
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
- sector_t block, u8 *digest);
+ sector_t block, u8 *digest, bool *is_zero);
#endif /* DM_VERITY_H */
--
2.6.0.rc2.230.g3dd15c0
On Thu, Dec 03 2015 at 9:26am -0500,
Sami Tolvanen <[email protected]> wrote:
> Changes since v1:
>
> - Added CONFIG_DM_VERITY_FEC and split error correction into
> dm-verity-fec.[ch] to further separate the functionality from the
> rest of dm-verity. Follows the same pattern as dm-uevent.
>
> - Added missing dependencies for REED_SOLOMON to Kconfig.
>
> - Renamed dm-verity.c to dm-verity-target.c to allow an optional
> object to be added. Follows the naming convention of dm-cache and
> dm-era.
>
> - Changed the algorithm to work with one or more small buffers (~4k)
> instead of a single large one. The more buffers we can allocate,
> the faster it will work, but we don't have to preallocate a large
> amount of memory anymore.
>
> - Changed memory allocation to use mempools. v2 preallocates all the
> memory required for each worker thread to guarantee forward
> progress in case of memory pressure. The code attempts to allocate
> more buffers (using GFP_NOIO) and uses them if available.
>
> - Added graceful handling of IO errors, which are now treated as any
> other corruption.
>
> - Rebased against linux-dm/for-next.
Thanks a lot for these advances, at a high-level it sounds like you've
handled the issues raised as part of v1 review very well.
I'll review closer now. Goal is to get these changes staged in
linux-next for upstream inclusion during the 4.5 merge window.
On Thu, Dec 03 2015 at 2:54pm -0500,
Mike Snitzer <[email protected]> wrote:
> On Thu, Dec 03 2015 at 9:26am -0500,
> Sami Tolvanen <[email protected]> wrote:
>
> > Changes since v1:
> >
> > - Added CONFIG_DM_VERITY_FEC and split error correction into
> > dm-verity-fec.[ch] to further separate the functionality from the
> > rest of dm-verity. Follows the same pattern as dm-uevent.
> >
> > - Added missing dependencies for REED_SOLOMON to Kconfig.
> >
> > - Renamed dm-verity.c to dm-verity-target.c to allow an optional
> > object to be added. Follows the naming convention of dm-cache and
> > dm-era.
> >
> > - Changed the algorithm to work with one or more small buffers (~4k)
> > instead of a single large one. The more buffers we can allocate,
> > the faster it will work, but we don't have to preallocate a large
> > amount of memory anymore.
> >
> > - Changed memory allocation to use mempools. v2 preallocates all the
> > memory required for each worker thread to guarantee forward
> > progress in case of memory pressure. The code attempts to allocate
> > more buffers (using GFP_NOIO) and uses them if available.
> >
> > - Added graceful handling of IO errors, which are now treated as any
> > other corruption.
> >
> > - Rebased against linux-dm/for-next.
>
> Thanks a lot for these advances, at a high-level it sounds like you've
> handled the issues raised as part of v1 review very well.
>
> I'll review closer now. Goal is to get these changes staged in
> linux-next for upstream inclusion during the 4.5 merge window.
I took a first pass through your code and pushed the result to this
temporary branch here:
http://git.kernel.org/cgit/linux/kernel/git/snitzer/linux.git/log/?h=dm-verity-fec
As you'll see, I focused primarily on splitting the dm-verity core
changes out from the FEC and ignore_zero_blocks changes.
Along the way I tweaked some style-nits here or there but the biggest
difference between your v2 and what I pushed is captured in this commit:
http://git.kernel.org/cgit/linux/kernel/git/snitzer/linux.git/commit/?h=dm-verity-fec&id=cc1339fae225d8cdc8fffeff30f0a9981367d6c0
If you're OK with those changes I'll fold that commit into your main FEC
commit.
I'll carry on with reviewing your new code (outside of dm-verity core)
tomorrow.
Mike
On Thu, Dec 03, 2015 at 06:05:38PM -0500, Mike Snitzer wrote:
> If you're OK with those changes I'll fold that commit into your main FEC
> commit.
Yes, these changes look fine. Thanks!
Sami
On Fri, Dec 04 2015 at 5:03P -0500,
Sami Tolvanen <[email protected]> wrote:
> On Thu, Dec 03, 2015 at 06:05:38PM -0500, Mike Snitzer wrote:
> > If you're OK with those changes I'll fold that commit into your main FEC
> > commit.
>
> Yes, these changes look fine. Thanks!
OK, I reviewed the FEC code and have staged it in linux-next with the
following fixes/tweaks/nits folded in. Please let me know if you see
any problems with this (I'm obviously not an 80-columns zealot).
I'm going to carry on reviewing aspects of memory allocations, table
loads via ctr, etc.. but the code I've staged in linux-dm.git's
'for-next' is worth handing off to linux-next to see if it catches
anything, see:
https://git.kernel.org/cgit/linux/kernel/git/device-mapper/linux-dm.git/log/?h=for-next
I'd really appreciate it if you could do some regression testing, etc on
your end to verify I didn't break anything while tweaking things.
Thanks!
From: Mike Snitzer <[email protected]>
Date: Fri, 4 Dec 2015 15:49:58 -0500
Subject: [PATCH] dm verity fec: whitespace fixes and other small nits
---
drivers/md/dm-verity-fec.c | 153 +++++++++++++++++++--------------------------
1 file changed, 65 insertions(+), 88 deletions(-)
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index a40ac01..e722ce5 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -69,18 +69,15 @@ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
position = (index + rsb) * v->fec->roots;
block = position >> v->data_dev_block_bits;
-
*offset = (unsigned)(position - (block << v->data_dev_block_bits));
res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf);
-
if (unlikely(IS_ERR(res))) {
DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
v->data_dev->name, (unsigned long long)rsb,
(unsigned long long)(v->fec->start + block),
PTR_ERR(res));
*buf = NULL;
- return NULL;
}
return res;
@@ -92,8 +89,7 @@ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
/* Loop over each extra buffer slot. */
#define fec_for_each_extra_buffer(io, __i) \
- for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; \
- __i++)
+ for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++)
/* Loop over each allocated buffer. */
#define fec_for_each_buffer(io, __i) \
@@ -132,14 +128,14 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
u64 rsb, int byte_index, unsigned block_offset,
int neras)
{
- int r = -1, corrected = 0, res;
+ int r, corrected = 0, res;
struct dm_buffer *buf;
unsigned n, i, offset;
u8 *par, *block;
par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
- if (unlikely(!par))
- return r;
+ if (IS_ERR(par))
+ return PTR_ERR(par);
/*
* Decode the RS blocks we have in bufs. Each RS block results in
@@ -148,9 +144,12 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
fec_for_each_buffer_rs_block(fio, n, i) {
block = fec_buffer_rs_block(v, fio, n, i);
res = fec_decode_rs8(v, fio, block, &par[offset], neras);
+ if (res < 0) {
+ dm_bufio_release(buf);
- if (res < 0)
+ r = res;
goto error;
+ }
corrected += res;
fio->output[block_offset] = block[byte_index];
@@ -164,25 +163,20 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
if (offset >= 1 << v->data_dev_block_bits) {
dm_bufio_release(buf);
- par = fec_read_parity(v, rsb, block_offset, &offset,
- &buf);
- if (unlikely(!par))
- return r;
+ par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
+ if (unlikely(IS_ERR(par)))
+ return PTR_ERR(par);
}
}
-
done:
r = corrected;
error:
- dm_bufio_release(buf);
-
if (r < 0 && neras)
DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
v->data_dev->name, (unsigned long long)rsb, r);
else if (r > 0)
DMWARN_LIMIT("%s: FEC %llu: corrected %d errors",
- v->data_dev->name, (unsigned long long)rsb,
- r);
+ v->data_dev->name, (unsigned long long)rsb, r);
return r;
}
@@ -204,7 +198,7 @@ static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
/*
* Read data blocks that are part of the RS block and deinterleave as much as
- * fits into buffers. Check for erasure locations if neras is non-NULL.
+ * fits into buffers. Check for erasure locations if @neras is non-NULL.
*/
static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
u64 rsb, u64 target, unsigned block_offset,
@@ -299,7 +293,6 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
rs_block = fec_buffer_rs_block(v, fio, n, j);
rs_block[i] = bbuf[k];
}
-
done:
dm_bufio_release(buf);
}
@@ -317,7 +310,6 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
if (!fio->rs) {
fio->rs = mempool_alloc(v->fec->rs_pool, 0);
-
if (unlikely(!fio->rs)) {
DMERR("failed to allocate RS");
return -ENOMEM;
@@ -329,7 +321,6 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
continue;
fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOIO);
-
if (unlikely(!fio->bufs[n])) {
DMERR("failed to allocate FEC buffer");
return -ENOMEM;
@@ -342,12 +333,10 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
continue;
fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOIO);
-
/* we can manage with even one buffer if necessary */
if (unlikely(!fio->bufs[n]))
break;
}
-
fio->nbufs = n;
if (!fio->output) {
@@ -363,7 +352,7 @@ static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
}
/*
- * Initialize buffers and clear erasures. fec_read_bufs assumes buffers are
+ * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are
* zeroed before deinterleaving.
*/
static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
@@ -371,27 +360,26 @@ static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio)
unsigned n;
fec_for_each_buffer(fio, n)
- memset(fio->bufs[n], 0,
- v->fec->rsn << DM_VERITY_FEC_BUF_RS_BITS);
+ memset(fio->bufs[n], 0, v->fec->rsn << DM_VERITY_FEC_BUF_RS_BITS);
memset(fio->erasures, 0, sizeof(fio->erasures));
}
/*
* Decode all RS blocks in a single data block and return the target block
- * (indicated by "offset") in fio->output. If use_erasures is non-zero, uses
+ * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses
* hashes to locate erasures.
*/
static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
struct dm_verity_fec_io *fio, u64 rsb, u64 offset,
- int use_erasures)
+ bool use_erasures)
{
int r, neras = 0;
unsigned pos;
r = fec_alloc_bufs(v, fio);
if (unlikely(r < 0))
- return -1;
+ return r;
for (pos = 0; pos < 1 << v->data_dev_block_bits; ) {
fec_init_bufs(v, fio);
@@ -418,9 +406,8 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io),
v->digest_size)) {
DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)",
- v->data_dev->name, (unsigned long long)rsb,
- neras);
- return -1;
+ v->data_dev->name, (unsigned long long)rsb, neras);
+ return -EILSEQ;
}
return 0;
@@ -445,12 +432,12 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
enum verity_block_type type, sector_t block, u8 *dest,
struct bvec_iter *iter)
{
- int r = -1;
+ int r;
struct dm_verity_fec_io *fio = fec_io(io);
u64 offset, res, rsb;
if (!verity_fec_is_enabled(v))
- return -1;
+ return -EOPNOTSUPP;
if (type == DM_VERITY_BLOCK_TYPE_METADATA)
block += v->data_blocks;
@@ -481,12 +468,12 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
* them first. Do a second attempt with erasures if the corruption is
* bad enough.
*/
- r = fec_decode_rsb(v, io, fio, rsb, offset, 0);
- if (r < 0)
- r = fec_decode_rsb(v, io, fio, rsb, offset, 1);
-
- if (r < 0)
- return r;
+ r = fec_decode_rsb(v, io, fio, rsb, offset, false);
+ if (r < 0) {
+ r = fec_decode_rsb(v, io, fio, rsb, offset, true);
+ if (r < 0)
+ return r;
+ }
if (dest)
memcpy(dest, fio->output, 1 << v->data_dev_block_bits);
@@ -577,7 +564,6 @@ void verity_fec_dtr(struct dm_verity *v)
if (f->dev)
dm_put_device(v->ti, f->dev);
-
out:
kfree(f);
v->fec = NULL;
@@ -610,13 +596,14 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
unsigned *argc, const char *arg_name)
{
int r;
+ struct dm_target *ti = v->ti;
const char *arg_value;
unsigned long long num_ll;
unsigned char num_c;
char dummy;
if (!*argc) {
- v->ti->error = "FEC feature arguments require a value";
+ ti->error = "FEC feature arguments require a value";
return -EINVAL;
}
@@ -624,9 +611,9 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
(*argc)--;
if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) {
- r = dm_get_device(v->ti, arg_value, FMODE_READ, &v->fec->dev);
+ r = dm_get_device(ti, arg_value, FMODE_READ, &v->fec->dev);
if (r) {
- v->ti->error = "FEC device lookup failed";
+ ti->error = "FEC device lookup failed";
return r;
}
@@ -634,7 +621,7 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT))
>> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) {
- v->ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+ ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
return -EINVAL;
}
v->fec->blocks = num_ll;
@@ -643,7 +630,7 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 ||
((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) >>
(v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) {
- v->ti->error = "Invalid " DM_VERITY_OPT_FEC_START;
+ ti->error = "Invalid " DM_VERITY_OPT_FEC_START;
return -EINVAL;
}
v->fec->start = num_ll;
@@ -652,13 +639,13 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
if (sscanf(arg_value, "%hhu%c", &num_c, &dummy) != 1 || !num_c ||
num_c < (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MAX_RSN) ||
num_c > (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN)) {
- v->ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS;
+ ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS;
return -EINVAL;
}
v->fec->roots = num_c;
} else {
- v->ti->error = "Unrecognized verity FEC feature request";
+ ti->error = "Unrecognized verity FEC feature request";
return -EINVAL;
}
@@ -677,8 +664,8 @@ int verity_fec_ctr_alloc(struct dm_verity *v)
v->ti->error = "Cannot allocate FEC structure";
return -ENOMEM;
}
-
v->fec = f;
+
return 0;
}
@@ -689,6 +676,7 @@ int verity_fec_ctr_alloc(struct dm_verity *v)
int verity_fec_ctr(struct dm_verity *v)
{
struct dm_verity_fec *f = v->fec;
+ struct dm_target *ti = v->ti;
u64 hash_blocks;
if (!verity_fec_is_enabled(v)) {
@@ -718,24 +706,22 @@ int verity_fec_ctr(struct dm_verity *v)
* simplicity.
*/
if (v->data_dev_block_bits != v->hash_dev_block_bits) {
- v->ti->error = "Block sizes must match to use FEC";
+ ti->error = "Block sizes must match to use FEC";
return -EINVAL;
}
if (!f->roots) {
- v->ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS;
+ ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS;
return -EINVAL;
}
-
f->rsn = DM_VERITY_FEC_RSM - f->roots;
if (!f->blocks) {
- v->ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS;
+ ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS;
return -EINVAL;
}
f->rounds = f->blocks;
-
if (do_div(f->rounds, f->rsn))
f->rounds++;
@@ -744,7 +730,7 @@ int verity_fec_ctr(struct dm_verity *v)
* data_blocks and hash_blocks combined.
*/
if (f->blocks < v->data_blocks + hash_blocks || !f->rounds) {
- v->ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
+ ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS;
return -EINVAL;
}
@@ -753,89 +739,80 @@ int verity_fec_ctr(struct dm_verity *v)
* it to be large enough.
*/
f->hash_blocks = f->blocks - v->data_blocks;
-
if (dm_bufio_get_device_size(v->bufio) < f->hash_blocks) {
- v->ti->error = "Hash device is too small for "
- DM_VERITY_OPT_FEC_BLOCKS;
+ ti->error = "Hash device is too small for "
+ DM_VERITY_OPT_FEC_BLOCKS;
return -E2BIG;
}
f->bufio = dm_bufio_client_create(f->dev->bdev,
- 1 << v->data_dev_block_bits,
- 1, 0, NULL, NULL);
-
+ 1 << v->data_dev_block_bits,
+ 1, 0, NULL, NULL);
if (IS_ERR(f->bufio)) {
- v->ti->error = "Cannot initialize dm-bufio";
+ ti->error = "Cannot initialize FEC bufio client";
return PTR_ERR(f->bufio);
}
if (dm_bufio_get_device_size(f->bufio) <
- (f->start + f->rounds * f->roots)
- >> v->data_dev_block_bits) {
- v->ti->error = "FEC device is too small";
+ ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) {
+ ti->error = "FEC device is too small";
return -E2BIG;
}
f->data_bufio = dm_bufio_client_create(v->data_dev->bdev,
- 1 << v->data_dev_block_bits,
- 1, 0, NULL, NULL);
-
+ 1 << v->data_dev_block_bits,
+ 1, 0, NULL, NULL);
if (IS_ERR(f->data_bufio)) {
- v->ti->error = "Cannot initialize dm-bufio";
+ ti->error = "Cannot initialize FEC data bufio client";
return PTR_ERR(f->data_bufio);
}
if (dm_bufio_get_device_size(f->data_bufio) < v->data_blocks) {
- v->ti->error = "Data device is too small";
+ ti->error = "Data device is too small";
return -E2BIG;
}
/* Preallocate an rs_control structure for each worker thread */
f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc,
- fec_rs_free, (void *) v);
-
+ fec_rs_free, (void *) v);
if (!f->rs_pool) {
- v->ti->error = "Cannot allocate RS pool";
+ ti->error = "Cannot allocate RS pool";
return -ENOMEM;
}
f->cache = kmem_cache_create("dm_verity_fec_buffers",
- f->rsn << DM_VERITY_FEC_BUF_RS_BITS,
- 0, 0, NULL);
-
+ f->rsn << DM_VERITY_FEC_BUF_RS_BITS,
+ 0, 0, NULL);
if (!f->cache) {
- v->ti->error = "Cannot create FEC buffer cache";
+ ti->error = "Cannot create FEC buffer cache";
return -ENOMEM;
}
/* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() *
- DM_VERITY_FEC_BUF_PREALLOC,
- f->cache);
-
+ DM_VERITY_FEC_BUF_PREALLOC,
+ f->cache);
if (!f->prealloc_pool) {
- v->ti->error = "Cannot allocate FEC buffer prealloc pool";
+ ti->error = "Cannot allocate FEC buffer prealloc pool";
return -ENOMEM;
}
f->extra_pool = mempool_create_slab_pool(0, f->cache);
-
if (!f->extra_pool) {
- v->ti->error = "Cannot allocate FEC buffer extra pool";
+ ti->error = "Cannot allocate FEC buffer extra pool";
return -ENOMEM;
}
/* Preallocate an output buffer for each thread */
f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(),
- 1 << v->data_dev_block_bits);
-
+ 1 << v->data_dev_block_bits);
if (!f->output_pool) {
- v->ti->error = "Cannot allocate FEC output pool";
+ ti->error = "Cannot allocate FEC output pool";
return -ENOMEM;
}
/* Reserve space for our per-bio data */
- v->ti->per_bio_data_size += sizeof(struct dm_verity_fec_io);
+ ti->per_bio_data_size += sizeof(struct dm_verity_fec_io);
return 0;
}
--
2.4.9 (Apple Git-60)
On Fri, Dec 04, 2015 at 04:09:35PM -0500, Mike Snitzer wrote:
> I'd really appreciate it if you could do some regression testing,
> etc on your end to verify I didn't break anything while tweaking
> things.
Sure. The changes look fine. I applied them to my tree and everything
still works as expected in my tests.
Thanks for taking the time to review the patches!
Sami
On Mon, Dec 07 2015 at 8:21am -0500,
Sami Tolvanen <[email protected]> wrote:
> On Fri, Dec 04, 2015 at 04:09:35PM -0500, Mike Snitzer wrote:
> > I'd really appreciate it if you could do some regression testing,
> > etc on your end to verify I didn't break anything while tweaking
> > things.
>
> Sure. The changes look fine. I applied them to my tree and everything
> still works as expected in my tests.
Great. Moving forward it'd be awesome if you could work to get your
verity FEC support regression tests into cryptsetup's tests. We need a
way to verify future DM and/or other kernel changes don't somehow cause
this FEC support to regress.
Also, I know you said you'd be getting Milan a veritysetup patch soon.
How are things going on that? Ideally that'll land in conjunction with
the kernel's dm-verity FEC support.
> Thanks for taking the time to review the patches!
No problem, like I said I'll be reviewing the code further but at this
point your changes seem to be on-track for upstream Linux 4.5
inclusion.
Mike
On Mon, Dec 07, 2015 at 09:58:14AM -0500, Mike Snitzer wrote:
> Great. Moving forward it'd be awesome if you could work to get your
> verity FEC support regression tests into cryptsetup's tests.
Sure. These tests would basically involve generating a valid disk
image, corrupting it up to the theoretical maximum, and verifying that
dm-verity can correct it.
> Also, I know you said you'd be getting Milan a veritysetup patch soon.
> How are things going on that?
I'm still planning to have a patch for him this week.
Sami
On 12/07/2015 05:31 PM, Sami Tolvanen wrote:
> On Mon, Dec 07, 2015 at 09:58:14AM -0500, Mike Snitzer wrote:
>> Great. Moving forward it'd be awesome if you could work to get your
>> verity FEC support regression tests into cryptsetup's tests.
>
> Sure. These tests would basically involve generating a valid disk
> image, corrupting it up to the theoretical maximum, and verifying that
> dm-verity can correct it.
Does the code handle/report even the same type of data corruption
for RS blocks? Or this is just silently ignored until
data area corruption happens?
Sorry, I did not study the code here, I am just curious :)
(IOW I mean if data are ok but recovery metadata is corrupted.)
Milan
On Mon, Dec 07 2015 at 1:07pm -0500,
Milan Broz <[email protected]> wrote:
> On 12/07/2015 05:31 PM, Sami Tolvanen wrote:
> > On Mon, Dec 07, 2015 at 09:58:14AM -0500, Mike Snitzer wrote:
> >> Great. Moving forward it'd be awesome if you could work to get your
> >> verity FEC support regression tests into cryptsetup's tests.
> >
> > Sure. These tests would basically involve generating a valid disk
> > image, corrupting it up to the theoretical maximum, and verifying that
> > dm-verity can correct it.
>
> Does the code handle/report even the same type of data corruption
> for RS blocks? Or this is just silently ignored until
> data area corruption happens?
> Sorry, I did not study the code here, I am just curious :)
>
> (IOW I mean if data are ok but recovery metadata is corrupted.)
There is no preemptive accessing/checking of the RS blocks. Once a
parity is needed for a given block, due to some corruption, the RS block
will be accessed with:
fec_decode_bufs -> fec_read_parity() -> fec_decode_rs8() -> decode_rs8()
I'm not seeing any verification of the metadata in fec_read_parity() --
so it would seem that corrupt RS blocks would result in -EBADMSG being
returned from decode_rs8() (by virtue of incorrect parity being passed
to decode_rs8).
Sami (or others) am I right?
On Mon, Dec 07, 2015 at 02:07:43PM -0500, Mike Snitzer wrote:
> I'm not seeing any verification of the metadata in fec_read_parity() --
> so it would seem that corrupt RS blocks would result in -EBADMSG being
> returned from decode_rs8() (by virtue of incorrect parity being passed
> to decode_rs8).
>
> Sami (or others) am I right?
Yes, decode_rs8 failing with -EBADMSG is one option. There are also two
other cases:
1) If the parity data is only partially corrupted, it may still be
possible to correct errors, provided that the actual data isn't
too severely corrupted.
2) If there's too much corruption for Reed-Solomon to detect, it's
also possible that decode_rs8 just returns bogus data, which we
will catch when verifying the hash again. This is why combining
error correction with integrity checking is essential.
In other words, the worst case is that we cannot correct errors for the
blocks covered by the corrupted parity data.
Sami