If a full metadata buffer is being written, don't read it first. This
prevents a read-modify-write cycle and increases performance on HDDs
considerably.
To do this we now calculate the checksums for all sectors in the bio in one
go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
which now checks if we overwrite the whole buffer.
Benchmarking with a 5400RPM HDD with bitmap mode:
integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
Without patch:
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
With patch:
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
Signed-off-by: Lukas Straub <[email protected]>
---
Hello Everyone,
So here is v2, now checking if we overwrite a whole metadata buffer instead
of the (buggy) check if we overwrite a whole tag area before.
Performance stayed the same (with --buffer-sectors=1).
The only quirk now is that it advertises a very big optimal io size in the
standard configuration (where buffer_sectors=128). Is this a Problem?
v2:
-fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
-fix optimal io size to check if we overwrite a whole metadata buffer
Regards,
Lukas Straub
drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
1 file changed, 47 insertions(+), 34 deletions(-)
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index b225b3e445fa..a6d3cf1406df 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
if (unlikely(r))
return r;
- data = dm_bufio_read(ic->bufio, *metadata_block, &b);
- if (IS_ERR(data))
- return PTR_ERR(data);
+ /* Don't read metadata sectors from disk if we're going to overwrite them completely */
+ if (op == TAG_WRITE && *metadata_offset == 0 \
+ && total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
+ data = dm_bufio_new(ic->bufio, *metadata_block, &b);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ } else {
+ data = dm_bufio_read(ic->bufio, *metadata_block, &b);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ }
to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
dp = data + *metadata_offset;
@@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
{
struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
struct dm_integrity_c *ic = dio->ic;
+ unsigned sectors_to_process = dio->range.n_sectors;
+ sector_t sector = dio->range.logical_sector;
int r;
@@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
struct bio_vec bv;
unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
- char *checksums;
+ char *checksums, *checksums_ptr;
unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
char checksums_onstack[HASH_MAX_DIGESTSIZE];
- unsigned sectors_to_process = dio->range.n_sectors;
- sector_t sector = dio->range.logical_sector;
if (unlikely(ic->mode == 'R'))
goto skip_io;
- checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
+ checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
if (!checksums) {
checksums = checksums_onstack;
@@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
}
}
+ checksums_ptr = checksums;
__bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
unsigned pos;
- char *mem, *checksums_ptr;
-
-again:
+ char *mem;
mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
pos = 0;
- checksums_ptr = checksums;
do {
integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
- checksums_ptr += ic->tag_size;
- sectors_to_process -= ic->sectors_per_block;
+
+ if (likely(checksums != checksums_onstack)) {
+ checksums_ptr += ic->tag_size;
+ } else {
+ r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
+ ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
+ if (unlikely(r))
+ goto internal_hash_error;
+ }
+
pos += ic->sectors_per_block << SECTOR_SHIFT;
sector += ic->sectors_per_block;
- } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
+ sectors_to_process -= ic->sectors_per_block;
+ } while (pos < bv.bv_len && sectors_to_process);
kunmap_atomic(mem);
- r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
- checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
- if (unlikely(r)) {
- if (r > 0) {
- DMERR_LIMIT("Checksum failed at sector 0x%llx",
- (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
- r = -EILSEQ;
- atomic64_inc(&ic->number_of_mismatches);
- }
- if (likely(checksums != checksums_onstack))
- kfree(checksums);
- goto error;
- }
-
if (!sectors_to_process)
break;
+ }
- if (unlikely(pos < bv.bv_len)) {
- bv.bv_offset += pos;
- bv.bv_len -= pos;
- goto again;
+ if (likely(checksums != checksums_onstack)) {
+ r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
+ (dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
+ !dio->write ? TAG_CMP : TAG_WRITE);
+ if (unlikely(r)) {
+ kfree(checksums);
+ goto internal_hash_error;
}
+ kfree(checksums);
}
- if (likely(checksums != checksums_onstack))
- kfree(checksums);
} else {
struct bio_integrity_payload *bip = dio->orig_bi_integrity;
@@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
skip_io:
dec_in_flight(dio);
return;
+internal_hash_error:
+ if (r > 0) {
+ DMERR_LIMIT("Checksum failed at sector 0x%llx",
+ (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
+ r = -EILSEQ;
+ atomic64_inc(&ic->number_of_mismatches);
+ }
error:
dio->bi_status = errno_to_blk_status(r);
dec_in_flight(dio);
@@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
}
+
+ blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
}
static void calculate_journal_section_size(struct dm_integrity_c *ic)
--
2.20.1
On Thu, Feb 27 2020 at 8:26am -0500,
Lukas Straub <[email protected]> wrote:
> If a full metadata buffer is being written, don't read it first. This
> prevents a read-modify-write cycle and increases performance on HDDs
> considerably.
>
> To do this we now calculate the checksums for all sectors in the bio in one
> go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> which now checks if we overwrite the whole buffer.
>
> Benchmarking with a 5400RPM HDD with bitmap mode:
> integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
>
> Without patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
>
> With patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
>
> Signed-off-by: Lukas Straub <[email protected]>
> ---
> Hello Everyone,
> So here is v2, now checking if we overwrite a whole metadata buffer instead
> of the (buggy) check if we overwrite a whole tag area before.
> Performance stayed the same (with --buffer-sectors=1).
>
> The only quirk now is that it advertises a very big optimal io size in the
> standard configuration (where buffer_sectors=128). Is this a Problem?
>
> v2:
> -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> -fix optimal io size to check if we overwrite a whole metadata buffer
>
> Regards,
> Lukas Straub
Not sure why you didn't cc Mikulas but I just checked with him and he
thinks:
You're only seeing a boost because you're using 512-byte sector and
512-byte buffer. Patch helps that case but hurts in most other cases
(due to small buffers).
Mike
> drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> 1 file changed, 47 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> index b225b3e445fa..a6d3cf1406df 100644
> --- a/drivers/md/dm-integrity.c
> +++ b/drivers/md/dm-integrity.c
> @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> if (unlikely(r))
> return r;
>
> - data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> - if (IS_ERR(data))
> - return PTR_ERR(data);
> + /* Don't read metadata sectors from disk if we're going to overwrite them completely */
> + if (op == TAG_WRITE && *metadata_offset == 0 \
> + && total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> + data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> + if (IS_ERR(data))
> + return PTR_ERR(data);
> + } else {
> + data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> + if (IS_ERR(data))
> + return PTR_ERR(data);
> + }
>
> to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> dp = data + *metadata_offset;
> @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> {
> struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> struct dm_integrity_c *ic = dio->ic;
> + unsigned sectors_to_process = dio->range.n_sectors;
> + sector_t sector = dio->range.logical_sector;
>
> int r;
>
> @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> struct bio_vec bv;
> unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> - char *checksums;
> + char *checksums, *checksums_ptr;
> unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> char checksums_onstack[HASH_MAX_DIGESTSIZE];
> - unsigned sectors_to_process = dio->range.n_sectors;
> - sector_t sector = dio->range.logical_sector;
>
> if (unlikely(ic->mode == 'R'))
> goto skip_io;
>
> - checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> + checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> if (!checksums) {
> checksums = checksums_onstack;
> @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> }
> }
>
> + checksums_ptr = checksums;
> __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> unsigned pos;
> - char *mem, *checksums_ptr;
> -
> -again:
> + char *mem;
> mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> pos = 0;
> - checksums_ptr = checksums;
> do {
> integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> - checksums_ptr += ic->tag_size;
> - sectors_to_process -= ic->sectors_per_block;
> +
> + if (likely(checksums != checksums_onstack)) {
> + checksums_ptr += ic->tag_size;
> + } else {
> + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> + ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> + if (unlikely(r))
> + goto internal_hash_error;
> + }
> +
> pos += ic->sectors_per_block << SECTOR_SHIFT;
> sector += ic->sectors_per_block;
> - } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> + sectors_to_process -= ic->sectors_per_block;
> + } while (pos < bv.bv_len && sectors_to_process);
> kunmap_atomic(mem);
>
> - r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> - checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> - if (unlikely(r)) {
> - if (r > 0) {
> - DMERR_LIMIT("Checksum failed at sector 0x%llx",
> - (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> - r = -EILSEQ;
> - atomic64_inc(&ic->number_of_mismatches);
> - }
> - if (likely(checksums != checksums_onstack))
> - kfree(checksums);
> - goto error;
> - }
> -
> if (!sectors_to_process)
> break;
> + }
>
> - if (unlikely(pos < bv.bv_len)) {
> - bv.bv_offset += pos;
> - bv.bv_len -= pos;
> - goto again;
> + if (likely(checksums != checksums_onstack)) {
> + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> + (dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> + !dio->write ? TAG_CMP : TAG_WRITE);
> + if (unlikely(r)) {
> + kfree(checksums);
> + goto internal_hash_error;
> }
> + kfree(checksums);
> }
>
> - if (likely(checksums != checksums_onstack))
> - kfree(checksums);
> } else {
> struct bio_integrity_payload *bip = dio->orig_bi_integrity;
>
> @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> skip_io:
> dec_in_flight(dio);
> return;
> +internal_hash_error:
> + if (r > 0) {
> + DMERR_LIMIT("Checksum failed at sector 0x%llx",
> + (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> + r = -EILSEQ;
> + atomic64_inc(&ic->number_of_mismatches);
> + }
> error:
> dio->bi_status = errno_to_blk_status(r);
> dec_in_flight(dio);
> @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> }
> +
> + blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> }
>
> static void calculate_journal_section_size(struct dm_integrity_c *ic)
> --
> 2.20.1
>
On Tue, 24 Mar 2020 13:18:22 -0400
Mike Snitzer <[email protected]> wrote:
> On Thu, Feb 27 2020 at 8:26am -0500,
> Lukas Straub <[email protected]> wrote:
>
> > If a full metadata buffer is being written, don't read it first. This
> > prevents a read-modify-write cycle and increases performance on HDDs
> > considerably.
> >
> > To do this we now calculate the checksums for all sectors in the bio in one
> > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > which now checks if we overwrite the whole buffer.
> >
> > Benchmarking with a 5400RPM HDD with bitmap mode:
> > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> >
> > Without patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> >
> > With patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> >
> > Signed-off-by: Lukas Straub <[email protected]>
> > ---
> > Hello Everyone,
> > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > of the (buggy) check if we overwrite a whole tag area before.
> > Performance stayed the same (with --buffer-sectors=1).
> >
> > The only quirk now is that it advertises a very big optimal io size in the
> > standard configuration (where buffer_sectors=128). Is this a Problem?
> >
> > v2:
> > -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> > -fix optimal io size to check if we overwrite a whole metadata buffer
> >
> > Regards,
> > Lukas Straub
>
>
> Not sure why you didn't cc Mikulas but I just checked with him and he
> thinks:
>
> You're only seeing a boost because you're using 512-byte sector and
> 512-byte buffer. Patch helps that case but hurts in most other cases
> (due to small buffers).
Hmm, buffer-sectors is still user configurable. If the user wants fast
write performance on slow HDDs he can set a small buffer-sector and
benefit from this patch. With the default buffer-sectors=128 it
shouldn't have any impact.
Regards,
Lukas Straub
> Mike
>
>
> > drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> > 1 file changed, 47 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index b225b3e445fa..a6d3cf1406df 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> > if (unlikely(r))
> > return r;
> >
> > - data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > - if (IS_ERR(data))
> > - return PTR_ERR(data);
> > + /* Don't read metadata sectors from disk if we're going to overwrite them completely */
> > + if (op == TAG_WRITE && *metadata_offset == 0 \
> > + && total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> > + data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> > + if (IS_ERR(data))
> > + return PTR_ERR(data);
> > + } else {
> > + data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > + if (IS_ERR(data))
> > + return PTR_ERR(data);
> > + }
> >
> > to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> > dp = data + *metadata_offset;
> > @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> > {
> > struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> > struct dm_integrity_c *ic = dio->ic;
> > + unsigned sectors_to_process = dio->range.n_sectors;
> > + sector_t sector = dio->range.logical_sector;
> >
> > int r;
> >
> > @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> > struct bio_vec bv;
> > unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> > struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> > - char *checksums;
> > + char *checksums, *checksums_ptr;
> > unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> > char checksums_onstack[HASH_MAX_DIGESTSIZE];
> > - unsigned sectors_to_process = dio->range.n_sectors;
> > - sector_t sector = dio->range.logical_sector;
> >
> > if (unlikely(ic->mode == 'R'))
> > goto skip_io;
> >
> > - checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > + checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> > if (!checksums) {
> > checksums = checksums_onstack;
> > @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> > }
> > }
> >
> > + checksums_ptr = checksums;
> > __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> > unsigned pos;
> > - char *mem, *checksums_ptr;
> > -
> > -again:
> > + char *mem;
> > mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> > pos = 0;
> > - checksums_ptr = checksums;
> > do {
> > integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> > - checksums_ptr += ic->tag_size;
> > - sectors_to_process -= ic->sectors_per_block;
> > +
> > + if (likely(checksums != checksums_onstack)) {
> > + checksums_ptr += ic->tag_size;
> > + } else {
> > + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > + ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> > + if (unlikely(r))
> > + goto internal_hash_error;
> > + }
> > +
> > pos += ic->sectors_per_block << SECTOR_SHIFT;
> > sector += ic->sectors_per_block;
> > - } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> > + sectors_to_process -= ic->sectors_per_block;
> > + } while (pos < bv.bv_len && sectors_to_process);
> > kunmap_atomic(mem);
> >
> > - r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > - checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> > - if (unlikely(r)) {
> > - if (r > 0) {
> > - DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > - (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > - r = -EILSEQ;
> > - atomic64_inc(&ic->number_of_mismatches);
> > - }
> > - if (likely(checksums != checksums_onstack))
> > - kfree(checksums);
> > - goto error;
> > - }
> > -
> > if (!sectors_to_process)
> > break;
> > + }
> >
> > - if (unlikely(pos < bv.bv_len)) {
> > - bv.bv_offset += pos;
> > - bv.bv_len -= pos;
> > - goto again;
> > + if (likely(checksums != checksums_onstack)) {
> > + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > + (dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> > + !dio->write ? TAG_CMP : TAG_WRITE);
> > + if (unlikely(r)) {
> > + kfree(checksums);
> > + goto internal_hash_error;
> > }
> > + kfree(checksums);
> > }
> >
> > - if (likely(checksums != checksums_onstack))
> > - kfree(checksums);
> > } else {
> > struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> >
> > @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> > skip_io:
> > dec_in_flight(dio);
> > return;
> > +internal_hash_error:
> > + if (r > 0) {
> > + DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > + (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > + r = -EILSEQ;
> > + atomic64_inc(&ic->number_of_mismatches);
> > + }
> > error:
> > dio->bi_status = errno_to_blk_status(r);
> > dec_in_flight(dio);
> > @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> > limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> > blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> > }
> > +
> > + blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> > }
> >
> > static void calculate_journal_section_size(struct dm_integrity_c *ic)
> > --
> > 2.20.1
> >
>
On Tue, Mar 24 2020 at 2:59pm -0400,
Lukas Straub <[email protected]> wrote:
> On Tue, 24 Mar 2020 13:18:22 -0400
> Mike Snitzer <[email protected]> wrote:
>
> > On Thu, Feb 27 2020 at 8:26am -0500,
> > Lukas Straub <[email protected]> wrote:
> >
> > > If a full metadata buffer is being written, don't read it first. This
> > > prevents a read-modify-write cycle and increases performance on HDDs
> > > considerably.
> > >
> > > To do this we now calculate the checksums for all sectors in the bio in one
> > > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > > which now checks if we overwrite the whole buffer.
> > >
> > > Benchmarking with a 5400RPM HDD with bitmap mode:
> > > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > >
> > > Without patch:
> > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> > >
> > > With patch:
> > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> > >
> > > Signed-off-by: Lukas Straub <[email protected]>
> > > ---
> > > Hello Everyone,
> > > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > > of the (buggy) check if we overwrite a whole tag area before.
> > > Performance stayed the same (with --buffer-sectors=1).
> > >
> > > The only quirk now is that it advertises a very big optimal io size in the
> > > standard configuration (where buffer_sectors=128). Is this a Problem?
> > >
> > > v2:
> > > -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> > > -fix optimal io size to check if we overwrite a whole metadata buffer
> > >
> > > Regards,
> > > Lukas Straub
> >
> >
> > Not sure why you didn't cc Mikulas but I just checked with him and he
> > thinks:
> >
> > You're only seeing a boost because you're using 512-byte sector and
> > 512-byte buffer. Patch helps that case but hurts in most other cases
> > (due to small buffers).
>
> Hmm, buffer-sectors is still user configurable. If the user wants fast
> write performance on slow HDDs he can set a small buffer-sector and
> benefit from this patch. With the default buffer-sectors=128 it
> shouldn't have any impact.
OK, if you'd be willing to conduct tests to prove there is no impact
with larger buffers that'd certainly help reinforce your position and
make it easier for me to take your patch.
But if you're just testing against slow HDD testbeds then the test
coverage isn't wide enough.
Thanks,
Mike
On Tue, 24 Mar 2020 15:11:49 -0400
Mike Snitzer <[email protected]> wrote:
> On Tue, Mar 24 2020 at 2:59pm -0400,
> Lukas Straub <[email protected]> wrote:
>
> > On Tue, 24 Mar 2020 13:18:22 -0400
> > Mike Snitzer <[email protected]> wrote:
> >
> > > On Thu, Feb 27 2020 at 8:26am -0500,
> > > Lukas Straub <[email protected]> wrote:
> > >
> > > > If a full metadata buffer is being written, don't read it first. This
> > > > prevents a read-modify-write cycle and increases performance on HDDs
> > > > considerably.
> > > >
> > > > To do this we now calculate the checksums for all sectors in the bio in one
> > > > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > > > which now checks if we overwrite the whole buffer.
> > > >
> > > > Benchmarking with a 5400RPM HDD with bitmap mode:
> > > > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > > > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > > > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> > > >
> > > > Without patch:
> > > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
> > > >
> > > > With patch:
> > > > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
> > > >
> > > > Signed-off-by: Lukas Straub <[email protected]>
> > > > ---
> > > > Hello Everyone,
> > > > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > > > of the (buggy) check if we overwrite a whole tag area before.
> > > > Performance stayed the same (with --buffer-sectors=1).
> > > >
> > > > The only quirk now is that it advertises a very big optimal io size in the
> > > > standard configuration (where buffer_sectors=128). Is this a Problem?
> > > >
> > > > v2:
> > > > -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> > > > -fix optimal io size to check if we overwrite a whole metadata buffer
> > > >
> > > > Regards,
> > > > Lukas Straub
> > >
> > >
> > > Not sure why you didn't cc Mikulas but I just checked with him and he
> > > thinks:
> > >
> > > You're only seeing a boost because you're using 512-byte sector and
> > > 512-byte buffer. Patch helps that case but hurts in most other cases
> > > (due to small buffers).
> >
> > Hmm, buffer-sectors is still user configurable. If the user wants fast
> > write performance on slow HDDs he can set a small buffer-sector and
> > benefit from this patch. With the default buffer-sectors=128 it
> > shouldn't have any impact.
>
> OK, if you'd be willing to conduct tests to prove there is no impact
> with larger buffers that'd certainly help reinforce your position and
> make it easier for me to take your patch.
>
> But if you're just testing against slow HDD testbeds then the test
> coverage isn't wide enough.
>
> Thanks,
> Mike
>
Sure,
This time testing with an Samsung 850 EVO SSD:
without patch:
root@teststore:/home/lukas# integritysetup format --no-wipe --batch-mode -I crc32c -B /dev/sdc1
root@teststore:/home/lukas# integritysetup open -I crc32c -B /dev/sdc1 ssda_integ
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
4177264640 bytes (4.2 GB, 3.9 GiB) copied, 28 s, 149 MB/s
65536+0 records in
65536+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 28.8946 s, 149 MB/s
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=4M count=$((256*4)) conv=fsync oflag=direct status=progress
4211081216 bytes (4.2 GB, 3.9 GiB) copied, 22 s, 191 MB/s
1024+0 records in
1024+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 22.6355 s, 190 MB/s
root@teststore:/home/lukas# time mkfs.ext4 /dev/mapper/ssda_integ 4g
mke2fs 1.45.5 (07-Jan-2020)
Creating filesystem with 1048576 4k blocks and 262144 inodes
Filesystem UUID: 679c4808-d549-4576-a8ef-4c46c78c6070
Superblock backups stored on blocks:
32768, 98304, 163840, 229376, 294912, 819200, 884736
Allocating group tables: done
Writing inode tables: done
Creating journal (16384 blocks): done
Writing superblocks and filesystem accounting information: done
real 0m0.318s
user 0m0.016s
sys 0m0.001s
root@teststore:/home/lukas# mount /dev/mapper/ssda_integ /mnt/
root@teststore:/home/lukas# cd /mnt/
root@teststore:/mnt# time tar -xJf /home/lukas/linux-5.5.4.tar.xz
real 0m18.708s
user 0m14.351s
sys 0m8.585s
root@teststore:/mnt# time rm -rf linux-5.5.4/
real 0m3.226s
user 0m0.117s
sys 0m2.958s
with patch:
root@teststore:/home/lukas# integritysetup format --no-wipe --batch-mode -I crc32c -B /dev/sdc1
root@teststore:/home/lukas# integritysetup open -I crc32c -B /dev/sdc1 ssda_integ
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
4169007104 bytes (4.2 GB, 3.9 GiB) copied, 27 s, 154 MB/s
65536+0 records in
65536+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 27.9165 s, 154 MB/s
root@teststore:/home/lukas# dd if=/dev/zero of=/dev/mapper/ssda_integ bs=4M count=$((256*4)) conv=fsync oflag=direct status=progress
4169138176 bytes (4.2 GB, 3.9 GiB) copied, 22 s, 189 MB/s
1024+0 records in
1024+0 records out
4294967296 bytes (4.3 GB, 4.0 GiB) copied, 22.9181 s, 187 MB/s
root@teststore:/home/lukas# time mkfs.ext4 /dev/mapper/ssda_integ 4g
mke2fs 1.45.5 (07-Jan-2020)
Creating filesystem with 1048576 4k blocks and 262144 inodes
Filesystem UUID: 9a9decad-19e2-46a4-8cc5-ce27238829f2
Superblock backups stored on blocks:
32768, 98304, 163840, 229376, 294912, 819200, 884736
Allocating group tables: done
Writing inode tables: done
Creating journal (16384 blocks): done
Writing superblocks and filesystem accounting information: done
real 0m0.341s
user 0m0.000s
sys 0m0.016s
root@teststore:/home/lukas# mount /dev/mapper/ssda_integ /mnt/
root@teststore:/home/lukas# cd /mnt/
root@teststore:/mnt# time tar -xJf /home/lukas/linux-5.5.4.tar.xz
real 0m18.409s
user 0m14.191s
sys 0m8.585s
root@teststore:/mnt# time rm -rf linux-5.5.4/
real 0m3.200s
user 0m0.133s
sys 0m2.979s
Regards,
Lukas Straub
Hi
I tested it on my rotational disk:
On Thu, 27 Feb 2020, Lukas Straub wrote:
> If a full metadata buffer is being written, don't read it first. This
> prevents a read-modify-write cycle and increases performance on HDDs
> considerably.
>
> To do this we now calculate the checksums for all sectors in the bio in one
> go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> which now checks if we overwrite the whole buffer.
>
> Benchmarking with a 5400RPM HDD with bitmap mode:
> integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
>
> Without patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
I get 42.3 MB/s. (it seems that my disk has better prefetch than yours).
> With patch:
> 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
I get 110 MB/s.
> Signed-off-by: Lukas Straub <[email protected]>
BUT: when I removed "--buffer-sectors=1" from the command line, I've got
these numbers:
without the patch: 101 MB/s
with the patch: 101 MB/s
So, you crippled performance with "--buffer-sectors=1" and then made a
patch to restore it.
The patch only helps 10%.
Mikulas
> ---
> Hello Everyone,
> So here is v2, now checking if we overwrite a whole metadata buffer instead
> of the (buggy) check if we overwrite a whole tag area before.
> Performance stayed the same (with --buffer-sectors=1).
>
> The only quirk now is that it advertises a very big optimal io size in the
> standard configuration (where buffer_sectors=128). Is this a Problem?
>
> v2:
> -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> -fix optimal io size to check if we overwrite a whole metadata buffer
>
> Regards,
> Lukas Straub
>
> drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> 1 file changed, 47 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> index b225b3e445fa..a6d3cf1406df 100644
> --- a/drivers/md/dm-integrity.c
> +++ b/drivers/md/dm-integrity.c
> @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> if (unlikely(r))
> return r;
>
> - data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> - if (IS_ERR(data))
> - return PTR_ERR(data);
> + /* Don't read metadata sectors from disk if we're going to overwrite them completely */
> + if (op == TAG_WRITE && *metadata_offset == 0 \
> + && total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> + data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> + if (IS_ERR(data))
> + return PTR_ERR(data);
> + } else {
> + data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> + if (IS_ERR(data))
> + return PTR_ERR(data);
> + }
>
> to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> dp = data + *metadata_offset;
> @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> {
> struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> struct dm_integrity_c *ic = dio->ic;
> + unsigned sectors_to_process = dio->range.n_sectors;
> + sector_t sector = dio->range.logical_sector;
>
> int r;
>
> @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> struct bio_vec bv;
> unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> - char *checksums;
> + char *checksums, *checksums_ptr;
> unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> char checksums_onstack[HASH_MAX_DIGESTSIZE];
> - unsigned sectors_to_process = dio->range.n_sectors;
> - sector_t sector = dio->range.logical_sector;
>
> if (unlikely(ic->mode == 'R'))
> goto skip_io;
>
> - checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> + checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> if (!checksums) {
> checksums = checksums_onstack;
> @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> }
> }
>
> + checksums_ptr = checksums;
> __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> unsigned pos;
> - char *mem, *checksums_ptr;
> -
> -again:
> + char *mem;
> mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> pos = 0;
> - checksums_ptr = checksums;
> do {
> integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> - checksums_ptr += ic->tag_size;
> - sectors_to_process -= ic->sectors_per_block;
> +
> + if (likely(checksums != checksums_onstack)) {
> + checksums_ptr += ic->tag_size;
> + } else {
> + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> + ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> + if (unlikely(r))
> + goto internal_hash_error;
> + }
> +
> pos += ic->sectors_per_block << SECTOR_SHIFT;
> sector += ic->sectors_per_block;
> - } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> + sectors_to_process -= ic->sectors_per_block;
> + } while (pos < bv.bv_len && sectors_to_process);
> kunmap_atomic(mem);
>
> - r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> - checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> - if (unlikely(r)) {
> - if (r > 0) {
> - DMERR_LIMIT("Checksum failed at sector 0x%llx",
> - (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> - r = -EILSEQ;
> - atomic64_inc(&ic->number_of_mismatches);
> - }
> - if (likely(checksums != checksums_onstack))
> - kfree(checksums);
> - goto error;
> - }
> -
> if (!sectors_to_process)
> break;
> + }
>
> - if (unlikely(pos < bv.bv_len)) {
> - bv.bv_offset += pos;
> - bv.bv_len -= pos;
> - goto again;
> + if (likely(checksums != checksums_onstack)) {
> + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> + (dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> + !dio->write ? TAG_CMP : TAG_WRITE);
> + if (unlikely(r)) {
> + kfree(checksums);
> + goto internal_hash_error;
> }
> + kfree(checksums);
> }
>
> - if (likely(checksums != checksums_onstack))
> - kfree(checksums);
> } else {
> struct bio_integrity_payload *bip = dio->orig_bi_integrity;
>
> @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> skip_io:
> dec_in_flight(dio);
> return;
> +internal_hash_error:
> + if (r > 0) {
> + DMERR_LIMIT("Checksum failed at sector 0x%llx",
> + (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> + r = -EILSEQ;
> + atomic64_inc(&ic->number_of_mismatches);
> + }
> error:
> dio->bi_status = errno_to_blk_status(r);
> dec_in_flight(dio);
> @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> }
> +
> + blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> }
>
> static void calculate_journal_section_size(struct dm_integrity_c *ic)
> --
> 2.20.1
>
>
> --
> dm-devel mailing list
> [email protected]
> https://www.redhat.com/mailman/listinfo/dm-devel
>
On Mon, 30 Mar 2020 12:34:04 -0400 (EDT)
Mikulas Patocka <[email protected]> wrote:
> Hi
>
> I tested it on my rotational disk:
>
>
> On Thu, 27 Feb 2020, Lukas Straub wrote:
>
> > If a full metadata buffer is being written, don't read it first. This
> > prevents a read-modify-write cycle and increases performance on HDDs
> > considerably.
> >
> > To do this we now calculate the checksums for all sectors in the bio in one
> > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > which now checks if we overwrite the whole buffer.
> >
> > Benchmarking with a 5400RPM HDD with bitmap mode:
> > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> >
> > Without patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
>
> I get 42.3 MB/s. (it seems that my disk has better prefetch than yours).
>
> > With patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
>
> I get 110 MB/s.
>
> > Signed-off-by: Lukas Straub <[email protected]>
>
> BUT: when I removed "--buffer-sectors=1" from the command line, I've got
> these numbers:
> without the patch: 101 MB/s
I get 86.5 MB/s.
So in my case it's worth it and as you saw it doesn't (negatively) affect other cases.
Regards,
Lukas Straub
> with the patch: 101 MB/s
>
> So, you crippled performance with "--buffer-sectors=1" and then made a
> patch to restore it.
>
> The patch only helps 10%.
>
> Mikulas
>
>
> > ---
> > Hello Everyone,
> > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > of the (buggy) check if we overwrite a whole tag area before.
> > Performance stayed the same (with --buffer-sectors=1).
> >
> > The only quirk now is that it advertises a very big optimal io size in the
> > standard configuration (where buffer_sectors=128). Is this a Problem?
> >
> > v2:
> > -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> > -fix optimal io size to check if we overwrite a whole metadata buffer
> >
> > Regards,
> > Lukas Straub
> >
> > drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> > 1 file changed, 47 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index b225b3e445fa..a6d3cf1406df 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> > if (unlikely(r))
> > return r;
> >
> > - data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > - if (IS_ERR(data))
> > - return PTR_ERR(data);
> > + /* Don't read metadata sectors from disk if we're going to overwrite them completely */
> > + if (op == TAG_WRITE && *metadata_offset == 0 \
> > + && total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> > + data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> > + if (IS_ERR(data))
> > + return PTR_ERR(data);
> > + } else {
> > + data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > + if (IS_ERR(data))
> > + return PTR_ERR(data);
> > + }
> >
> > to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> > dp = data + *metadata_offset;
> > @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> > {
> > struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> > struct dm_integrity_c *ic = dio->ic;
> > + unsigned sectors_to_process = dio->range.n_sectors;
> > + sector_t sector = dio->range.logical_sector;
> >
> > int r;
> >
> > @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> > struct bio_vec bv;
> > unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> > struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> > - char *checksums;
> > + char *checksums, *checksums_ptr;
> > unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> > char checksums_onstack[HASH_MAX_DIGESTSIZE];
> > - unsigned sectors_to_process = dio->range.n_sectors;
> > - sector_t sector = dio->range.logical_sector;
> >
> > if (unlikely(ic->mode == 'R'))
> > goto skip_io;
> >
> > - checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > + checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> > if (!checksums) {
> > checksums = checksums_onstack;
> > @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> > }
> > }
> >
> > + checksums_ptr = checksums;
> > __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> > unsigned pos;
> > - char *mem, *checksums_ptr;
> > -
> > -again:
> > + char *mem;
> > mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> > pos = 0;
> > - checksums_ptr = checksums;
> > do {
> > integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> > - checksums_ptr += ic->tag_size;
> > - sectors_to_process -= ic->sectors_per_block;
> > +
> > + if (likely(checksums != checksums_onstack)) {
> > + checksums_ptr += ic->tag_size;
> > + } else {
> > + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > + ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> > + if (unlikely(r))
> > + goto internal_hash_error;
> > + }
> > +
> > pos += ic->sectors_per_block << SECTOR_SHIFT;
> > sector += ic->sectors_per_block;
> > - } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> > + sectors_to_process -= ic->sectors_per_block;
> > + } while (pos < bv.bv_len && sectors_to_process);
> > kunmap_atomic(mem);
> >
> > - r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > - checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> > - if (unlikely(r)) {
> > - if (r > 0) {
> > - DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > - (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > - r = -EILSEQ;
> > - atomic64_inc(&ic->number_of_mismatches);
> > - }
> > - if (likely(checksums != checksums_onstack))
> > - kfree(checksums);
> > - goto error;
> > - }
> > -
> > if (!sectors_to_process)
> > break;
> > + }
> >
> > - if (unlikely(pos < bv.bv_len)) {
> > - bv.bv_offset += pos;
> > - bv.bv_len -= pos;
> > - goto again;
> > + if (likely(checksums != checksums_onstack)) {
> > + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > + (dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> > + !dio->write ? TAG_CMP : TAG_WRITE);
> > + if (unlikely(r)) {
> > + kfree(checksums);
> > + goto internal_hash_error;
> > }
> > + kfree(checksums);
> > }
> >
> > - if (likely(checksums != checksums_onstack))
> > - kfree(checksums);
> > } else {
> > struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> >
> > @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> > skip_io:
> > dec_in_flight(dio);
> > return;
> > +internal_hash_error:
> > + if (r > 0) {
> > + DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > + (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > + r = -EILSEQ;
> > + atomic64_inc(&ic->number_of_mismatches);
> > + }
> > error:
> > dio->bi_status = errno_to_blk_status(r);
> > dec_in_flight(dio);
> > @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> > limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> > blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> > }
> > +
> > + blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> > }
> >
> > static void calculate_journal_section_size(struct dm_integrity_c *ic)
> > --
> > 2.20.1
> >
> >
> > --
> > dm-devel mailing list
> > [email protected]
> > https://www.redhat.com/mailman/listinfo/dm-devel
> >
>
On Mon, 30 Mar 2020 12:34:04 -0400 (EDT)
Mikulas Patocka <[email protected]> wrote:
> Hi
>
> I tested it on my rotational disk:
>
>
> On Thu, 27 Feb 2020, Lukas Straub wrote:
>
> > If a full metadata buffer is being written, don't read it first. This
> > prevents a read-modify-write cycle and increases performance on HDDs
> > considerably.
> >
> > To do this we now calculate the checksums for all sectors in the bio in one
> > go in integrity_metadata and then pass the result to dm_integrity_rw_tag,
> > which now checks if we overwrite the whole buffer.
> >
> > Benchmarking with a 5400RPM HDD with bitmap mode:
> > integritysetup format --no-wipe --batch-mode --interleave-sectors $((64*1024)) -t 4 -s 512 -I crc32c -B /dev/sdc
> > integritysetup open --buffer-sectors=1 -I crc32c -B /dev/sdc hdda_integ
> > dd if=/dev/zero of=/dev/mapper/hdda_integ bs=64K count=$((16*1024*4)) conv=fsync oflag=direct status=progress
> >
> > Without patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 400.326 s, 10.7 MB/s
>
> I get 42.3 MB/s. (it seems that my disk has better prefetch than yours).
>
> > With patch:
> > 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 41.2057 s, 104 MB/s
>
> I get 110 MB/s.
>
> > Signed-off-by: Lukas Straub <[email protected]>
>
> BUT: when I removed "--buffer-sectors=1" from the command line, I've got
> these numbers:
> without the patch: 101 MB/s
I get 86.5 MB/s.
So in my case it's worth it and as you saw it doesn't (negatively) affect other cases.
Regards,
Lukas Straub
> with the patch: 101 MB/s
>
> So, you crippled performance with "--buffer-sectors=1" and then made a
> patch to restore it.
>
> The patch only helps 10%.
>
> Mikulas
>
>
> > ---
> > Hello Everyone,
> > So here is v2, now checking if we overwrite a whole metadata buffer instead
> > of the (buggy) check if we overwrite a whole tag area before.
> > Performance stayed the same (with --buffer-sectors=1).
> >
> > The only quirk now is that it advertises a very big optimal io size in the
> > standard configuration (where buffer_sectors=128). Is this a Problem?
> >
> > v2:
> > -fix dm_integrity_rw_tag to check if we overwrite a whole metadat buffer
> > -fix optimal io size to check if we overwrite a whole metadata buffer
> >
> > Regards,
> > Lukas Straub
> >
> > drivers/md/dm-integrity.c | 81 +++++++++++++++++++++++----------------
> > 1 file changed, 47 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
> > index b225b3e445fa..a6d3cf1406df 100644
> > --- a/drivers/md/dm-integrity.c
> > +++ b/drivers/md/dm-integrity.c
> > @@ -1309,9 +1309,17 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
> > if (unlikely(r))
> > return r;
> >
> > - data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > - if (IS_ERR(data))
> > - return PTR_ERR(data);
> > + /* Don't read metadata sectors from disk if we're going to overwrite them completely */
> > + if (op == TAG_WRITE && *metadata_offset == 0 \
> > + && total_size >= (1U << SECTOR_SHIFT << ic->log2_buffer_sectors)) {
> > + data = dm_bufio_new(ic->bufio, *metadata_block, &b);
> > + if (IS_ERR(data))
> > + return PTR_ERR(data);
> > + } else {
> > + data = dm_bufio_read(ic->bufio, *metadata_block, &b);
> > + if (IS_ERR(data))
> > + return PTR_ERR(data);
> > + }
> >
> > to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
> > dp = data + *metadata_offset;
> > @@ -1514,6 +1522,8 @@ static void integrity_metadata(struct work_struct *w)
> > {
> > struct dm_integrity_io *dio = container_of(w, struct dm_integrity_io, work);
> > struct dm_integrity_c *ic = dio->ic;
> > + unsigned sectors_to_process = dio->range.n_sectors;
> > + sector_t sector = dio->range.logical_sector;
> >
> > int r;
> >
> > @@ -1522,16 +1532,14 @@ static void integrity_metadata(struct work_struct *w)
> > struct bio_vec bv;
> > unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
> > struct bio *bio = dm_bio_from_per_bio_data(dio, sizeof(struct dm_integrity_io));
> > - char *checksums;
> > + char *checksums, *checksums_ptr;
> > unsigned extra_space = unlikely(digest_size > ic->tag_size) ? digest_size - ic->tag_size : 0;
> > char checksums_onstack[HASH_MAX_DIGESTSIZE];
> > - unsigned sectors_to_process = dio->range.n_sectors;
> > - sector_t sector = dio->range.logical_sector;
> >
> > if (unlikely(ic->mode == 'R'))
> > goto skip_io;
> >
> > - checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > + checksums = kmalloc((dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
> > GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
> > if (!checksums) {
> > checksums = checksums_onstack;
> > @@ -1542,49 +1550,45 @@ static void integrity_metadata(struct work_struct *w)
> > }
> > }
> >
> > + checksums_ptr = checksums;
> > __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) {
> > unsigned pos;
> > - char *mem, *checksums_ptr;
> > -
> > -again:
> > + char *mem;
> > mem = (char *)kmap_atomic(bv.bv_page) + bv.bv_offset;
> > pos = 0;
> > - checksums_ptr = checksums;
> > do {
> > integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
> > - checksums_ptr += ic->tag_size;
> > - sectors_to_process -= ic->sectors_per_block;
> > +
> > + if (likely(checksums != checksums_onstack)) {
> > + checksums_ptr += ic->tag_size;
> > + } else {
> > + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > + ic->tag_size, !dio->write ? TAG_CMP : TAG_WRITE);
> > + if (unlikely(r))
> > + goto internal_hash_error;
> > + }
> > +
> > pos += ic->sectors_per_block << SECTOR_SHIFT;
> > sector += ic->sectors_per_block;
> > - } while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
> > + sectors_to_process -= ic->sectors_per_block;
> > + } while (pos < bv.bv_len && sectors_to_process);
> > kunmap_atomic(mem);
> >
> > - r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > - checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
> > - if (unlikely(r)) {
> > - if (r > 0) {
> > - DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > - (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > - r = -EILSEQ;
> > - atomic64_inc(&ic->number_of_mismatches);
> > - }
> > - if (likely(checksums != checksums_onstack))
> > - kfree(checksums);
> > - goto error;
> > - }
> > -
> > if (!sectors_to_process)
> > break;
> > + }
> >
> > - if (unlikely(pos < bv.bv_len)) {
> > - bv.bv_offset += pos;
> > - bv.bv_len -= pos;
> > - goto again;
> > + if (likely(checksums != checksums_onstack)) {
> > + r = dm_integrity_rw_tag(ic, checksums, &dio->metadata_block, &dio->metadata_offset,
> > + (dio->range.n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size,
> > + !dio->write ? TAG_CMP : TAG_WRITE);
> > + if (unlikely(r)) {
> > + kfree(checksums);
> > + goto internal_hash_error;
> > }
> > + kfree(checksums);
> > }
> >
> > - if (likely(checksums != checksums_onstack))
> > - kfree(checksums);
> > } else {
> > struct bio_integrity_payload *bip = dio->orig_bi_integrity;
> >
> > @@ -1615,6 +1619,13 @@ static void integrity_metadata(struct work_struct *w)
> > skip_io:
> > dec_in_flight(dio);
> > return;
> > +internal_hash_error:
> > + if (r > 0) {
> > + DMERR_LIMIT("Checksum failed at sector 0x%llx",
> > + (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
> > + r = -EILSEQ;
> > + atomic64_inc(&ic->number_of_mismatches);
> > + }
> > error:
> > dio->bi_status = errno_to_blk_status(r);
> > dec_in_flight(dio);
> > @@ -3019,6 +3030,8 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
> > limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
> > blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
> > }
> > +
> > + blk_limits_io_opt(limits, 1U << SECTOR_SHIFT << ic->log2_buffer_sectors >> ic->log2_tag_size << SECTOR_SHIFT );
> > }
> >
> > static void calculate_journal_section_size(struct dm_integrity_c *ic)
> > --
> > 2.20.1
> >
> >
> > --
> > dm-devel mailing list
> > [email protected]
> > https://www.redhat.com/mailman/listinfo/dm-devel
> >
>