2022-02-02 10:19:10

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 6/7] block: add pi for nvme enhanced integrity

The NVMe specification defines larger data integrity formats beyond the
t10 tuple. Add support for the specification defined CRC64 formats,
assuming the reference tag does not need to be split with the "storage
tag".

Cc: "Martin K. Petersen" <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
block/Kconfig | 1 +
block/t10-pi.c | 194 +++++++++++++++++++++++++++++++++++++++++
include/linux/t10-pi.h | 20 +++++
3 files changed, 215 insertions(+)

diff --git a/block/Kconfig b/block/Kconfig
index 205f8d01c695..e3ce9196ad07 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -75,6 +75,7 @@ config BLK_DEV_INTEGRITY_T10
tristate
depends on BLK_DEV_INTEGRITY
select CRC_T10DIF
+ select CRC64

config BLK_DEV_ZONED
bool "Zoned block device support"
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 758a76518854..7bfefe970bc5 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -7,8 +7,10 @@
#include <linux/t10-pi.h>
#include <linux/blk-integrity.h>
#include <linux/crc-t10dif.h>
+#include <linux/crc64.h>
#include <linux/module.h>
#include <net/checksum.h>
+#include <asm/unaligned.h>

typedef __be16 (csum_fn) (void *, unsigned int);

@@ -278,4 +280,196 @@ const struct blk_integrity_profile t10_pi_type3_ip = {
};
EXPORT_SYMBOL(t10_pi_type3_ip);

+static __be64 nvme_pi_crc64(void *data, unsigned int len)
+{
+ return cpu_to_be64(crc64_rocksoft(~0ULL, data, len));
+}
+
+static blk_status_t nvme_crc64_generate(struct blk_integrity_iter *iter,
+ enum t10_dif_type type)
+{
+ unsigned int i;
+
+ for (i = 0 ; i < iter->data_size ; i += iter->interval) {
+ struct nvme_crc64_pi_tuple *pi = iter->prot_buf;
+
+ pi->guard_tag = nvme_pi_crc64(iter->data_buf, iter->interval);
+ pi->app_tag = 0;
+
+ if (type == T10_PI_TYPE1_PROTECTION)
+ put_unaligned_be48(iter->seed, pi->ref_tag);
+ else
+ put_unaligned_be48(0ULL, pi->ref_tag);
+
+ iter->data_buf += iter->interval;
+ iter->prot_buf += iter->tuple_size;
+ iter->seed++;
+ }
+
+ return BLK_STS_OK;
+}
+
+static bool nvme_crc64_ref_escape(u8 *ref_tag)
+{
+ static u8 ref_escape[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+ return memcmp(ref_tag, ref_escape, sizeof(ref_escape)) == 0;
+}
+
+static blk_status_t nvme_crc64_verify(struct blk_integrity_iter *iter,
+ enum t10_dif_type type)
+{
+ unsigned int i;
+
+ for (i = 0; i < iter->data_size; i += iter->interval) {
+ struct nvme_crc64_pi_tuple *pi = iter->prot_buf;
+ u64 ref, seed;
+ __be64 csum;
+
+ if (type == T10_PI_TYPE1_PROTECTION) {
+ if (pi->app_tag == T10_PI_APP_ESCAPE)
+ goto next;
+
+ ref = get_unaligned_be48(pi->ref_tag);
+ seed = iter->seed & 0xffffffffffffull;
+ if (ref != seed) {
+ pr_err("%s: ref tag error at location %llu (rcvd %llu)\n",
+ iter->disk_name, seed, ref);
+ return BLK_STS_PROTECTION;
+ }
+ } else if (type == T10_PI_TYPE3_PROTECTION) {
+ if (pi->app_tag == T10_PI_APP_ESCAPE &&
+ nvme_crc64_ref_escape(pi->ref_tag))
+ goto next;
+ }
+
+ csum = nvme_pi_crc64(iter->data_buf, iter->interval);
+ if (pi->guard_tag != csum) {
+ pr_err("%s: guard tag error at sector %llu " \
+ "(rcvd %016llx, want %016llx)\n",
+ iter->disk_name, (unsigned long long)iter->seed,
+ be64_to_cpu(pi->guard_tag), be64_to_cpu(csum));
+ return BLK_STS_PROTECTION;
+ }
+
+next:
+ iter->data_buf += iter->interval;
+ iter->prot_buf += iter->tuple_size;
+ iter->seed++;
+ }
+
+ return BLK_STS_OK;
+}
+
+static blk_status_t nvme_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+{
+ return nvme_crc64_verify(iter, T10_PI_TYPE1_PROTECTION);
+}
+
+static blk_status_t nvme_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+{
+ return nvme_crc64_generate(iter, T10_PI_TYPE1_PROTECTION);
+}
+
+static void nvme_pi_type1_prepare(struct request *rq)
+{
+ const int tuple_sz = rq->q->integrity.tuple_size;
+ u64 ref_tag = nvme_pi_extended_ref_tag(rq);
+ struct bio *bio;
+
+ __rq_for_each_bio(bio, rq) {
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ u64 virt = bip_get_seed(bip) & 0xffffffffffffull;
+ struct bio_vec iv;
+ struct bvec_iter iter;
+
+ /* Already remapped? */
+ if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
+ break;
+
+ bip_for_each_vec(iv, bip, iter) {
+ unsigned int j;
+ void *p;
+
+ p = bvec_kmap_local(&iv);
+ for (j = 0; j < iv.bv_len; j += tuple_sz) {
+ struct nvme_crc64_pi_tuple *pi = p;
+ u64 ref = get_unaligned_be48(pi->ref_tag);
+
+ if (ref == virt)
+ put_unaligned_be48(ref_tag, pi->ref_tag);
+ virt++;
+ ref_tag++;
+ p += tuple_sz;
+ }
+ kunmap_local(p);
+ }
+
+ bip->bip_flags |= BIP_MAPPED_INTEGRITY;
+ }
+}
+
+static void nvme_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
+{
+ unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
+ const int tuple_sz = rq->q->integrity.tuple_size;
+ u64 ref_tag = nvme_pi_extended_ref_tag(rq);
+ struct bio *bio;
+
+ __rq_for_each_bio(bio, rq) {
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ u64 virt = bip_get_seed(bip) & 0xffffffffffffull;
+ struct bio_vec iv;
+ struct bvec_iter iter;
+
+ bip_for_each_vec(iv, bip, iter) {
+ unsigned int j;
+ void *p;
+
+ p = bvec_kmap_local(&iv);
+ for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
+ struct nvme_crc64_pi_tuple *pi = p;
+ u64 ref = get_unaligned_be48(pi->ref_tag);
+
+ if (ref == ref_tag)
+ put_unaligned_be48(virt, pi->ref_tag);
+ virt++;
+ ref_tag++;
+ intervals--;
+ p += tuple_sz;
+ }
+ kunmap_local(p);
+ }
+ }
+}
+
+static blk_status_t nvme_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+{
+ return nvme_crc64_verify(iter, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t nvme_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+{
+ return nvme_crc64_generate(iter, T10_PI_TYPE3_PROTECTION);
+}
+
+const struct blk_integrity_profile nvme_pi_type1_crc64 = {
+ .name = "NVME-DIF-TYPE1-CRC64",
+ .generate_fn = nvme_pi_type1_generate_crc,
+ .verify_fn = nvme_pi_type1_verify_crc,
+ .prepare_fn = nvme_pi_type1_prepare,
+ .complete_fn = nvme_pi_type1_complete,
+};
+EXPORT_SYMBOL(nvme_pi_type1_crc64);
+
+const struct blk_integrity_profile nvme_pi_type3_crc64 = {
+ .name = "NVME-DIF-TYPE3-CRC64",
+ .generate_fn = nvme_pi_type3_generate_crc,
+ .verify_fn = nvme_pi_type3_verify_crc,
+ .prepare_fn = t10_pi_type3_prepare,
+ .complete_fn = t10_pi_type3_complete,
+};
+EXPORT_SYMBOL(nvme_pi_type3_crc64);
+
+MODULE_LICENSE("GPL");
MODULE_LICENSE("GPL");
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c635c2e014e3..fd3a9b99500a 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -53,4 +53,24 @@ extern const struct blk_integrity_profile t10_pi_type1_ip;
extern const struct blk_integrity_profile t10_pi_type3_crc;
extern const struct blk_integrity_profile t10_pi_type3_ip;

+struct nvme_crc64_pi_tuple {
+ __be64 guard_tag;
+ __be16 app_tag;
+ __u8 ref_tag[6];
+};
+
+static inline u64 nvme_pi_extended_ref_tag(struct request *rq)
+{
+ unsigned int shift = ilog2(queue_logical_block_size(rq->q));
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ if (rq->q->integrity.interval_exp)
+ shift = rq->q->integrity.interval_exp;
+#endif
+ return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffffffffull;
+}
+
+extern const struct blk_integrity_profile nvme_pi_type1_crc64;
+extern const struct blk_integrity_profile nvme_pi_type3_crc64;
+
#endif
--
2.25.4


2022-02-02 17:32:13

by Martin K. Petersen

[permalink] [raw]
Subject: Re: [PATCHv2 6/7] block: add pi for nvme enhanced integrity


Keith,

This all looks pretty good to me. Only nit I have is:

> +static blk_status_t nvme_pi_type1_verify_crc(struct blk_integrity_iter *iter)
> +{
> + return nvme_crc64_verify(iter, T10_PI_TYPE1_PROTECTION);
> +}
> +
> +static blk_status_t nvme_pi_type1_generate_crc(struct blk_integrity_iter *iter)
> +{
> + return nvme_crc64_generate(iter, T10_PI_TYPE1_PROTECTION);
> +}

Since we will definitely need to support the CRC32C variants, the
nvme_pi_type1_ prefix is a bit too generic. Wish we had gone with Type 4
and 5 like I originally proposed in SCSI. Not a big fan of this "almost
exactly like T10 Type 1 except for all these differences" situation that
NVMe ended up with.

Anyway. So I think the NVMe-specific format helpers need to at the very
least capture that they are for the CRC64 case.

Other than that it looks OK.

--
Martin K. Petersen Oracle Linux Engineering

2022-02-03 11:34:35

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCHv2 6/7] block: add pi for nvme enhanced integrity

On 2/1/22 11:01, Keith Busch wrote:
> + ref = get_unaligned_be48(pi->ref_tag);
> + seed = iter->seed & 0xffffffffffffull;

The "& 0xffffffffffffull" operation occurs three times in this patch.
Has it been considered to introduce a lower_48_bits() function?

Thanks,

Bart.

2022-02-04 12:21:34

by Hannes Reinecke

[permalink] [raw]
Subject: Re: [PATCHv2 6/7] block: add pi for nvme enhanced integrity

On 2/1/22 20:01, Keith Busch wrote:
> The NVMe specification defines larger data integrity formats beyond the
> t10 tuple. Add support for the specification defined CRC64 formats,
> assuming the reference tag does not need to be split with the "storage
> tag".
>
> Cc: "Martin K. Petersen" <[email protected]>
> Signed-off-by: Keith Busch <[email protected]>
> ---
> block/Kconfig | 1 +
> block/t10-pi.c | 194 +++++++++++++++++++++++++++++++++++++++++
> include/linux/t10-pi.h | 20 +++++
> 3 files changed, 215 insertions(+)
>
Reviewed-by: Hannes Reinecke <[email protected]>

Cheers,

Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
[email protected] +49 911 74053 688
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), GF: Felix Imendörffer