This patchset aims to fix several problems related to T10-PI support.
These patches can be applied on top of Sagi's "[v1] Simlify dif_verify
routines and fixup fileio protection information code" patchset.
* Changes from v2:
- Introduces crc_t10dif_update() to calculate CRC by mutiple calls
- Handle odd SG mapping correctly instead of giving up
* Changes from v1:
- Reduce code duplication a bit in target_read_prot_action()
- Fix sbc_dif_verify() for WRITE_SAME command
- Fix inverted rw argument for fd_do_rw()
- Perform DIF verify before write for WRITE_SAME
Akinobu Mita (5):
target: ensure se_cmd->t_prot_sg is allocated when required
lib: introduce crc_t10dif_update()
target: handle odd SG mapping for data transfer memory
target: Fix sbc_dif_generate() and sbc_dif_verify() for WRITE SAME
target/file: enable WRITE SAME when protection info is enabled
drivers/target/target_core_file.c | 18 +++--
drivers/target/target_core_sbc.c | 136 +++++++++++++++++++++++----------
drivers/target/target_core_transport.c | 30 +++++---
include/linux/crc-t10dif.h | 1 +
include/target/target_core_base.h | 1 +
lib/crc-t10dif.c | 23 ++++++
6 files changed, 153 insertions(+), 56 deletions(-)
Cc: Tim Chen <[email protected]>
Cc: Herbert Xu <[email protected]>
Cc: "David S. Miller" <[email protected]>
Cc: [email protected]
Cc: Nicholas Bellinger <[email protected]>
Cc: Sagi Grimberg <[email protected]>
Cc: "Martin K. Petersen" <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: "James E.J. Bottomley" <[email protected]>
Cc: [email protected]
--
1.9.1
This introduces crc_t10dif_update() which enables to calculate CRC
for a block which straddles multiple SG elements by calling multiple
times.
Signed-off-by: Akinobu Mita <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Herbert Xu <[email protected]>
Cc: "David S. Miller" <[email protected]>
Cc: [email protected]
Cc: Nicholas Bellinger <[email protected]>
Cc: Sagi Grimberg <[email protected]>
Cc: "Martin K. Petersen" <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: "James E.J. Bottomley" <[email protected]>
Cc: [email protected]
---
* New patch from v3
include/linux/crc-t10dif.h | 1 +
lib/crc-t10dif.c | 23 +++++++++++++++++++++++
2 files changed, 24 insertions(+)
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index cf53d07..d81961e 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -9,5 +9,6 @@
extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer,
size_t len);
extern __u16 crc_t10dif(unsigned char const *, size_t);
+extern __u16 crc_t10dif_update(__u16 crc, unsigned char const *, size_t);
#endif
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index dfe6ec1..7cdbe2e 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -19,6 +19,29 @@
static struct crypto_shash *crct10dif_tfm;
static struct static_key crct10dif_fallback __read_mostly;
+__u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
+{
+ struct {
+ struct shash_desc shash;
+ char ctx[2];
+ } desc;
+ int err;
+
+ if (static_key_false(&crct10dif_fallback))
+ return crc_t10dif_generic(crc, buffer, len);
+
+ desc.shash.tfm = crct10dif_tfm;
+ desc.shash.flags = 0;
+
+ err = crypto_shash_import(&desc.shash, &crc);
+ BUG_ON(err);
+ err = crypto_shash_update(&desc.shash, buffer, len);
+ BUG_ON(err);
+
+ return *(__u16 *)desc.ctx;
+}
+EXPORT_SYMBOL(crc_t10dif_update);
+
__u16 crc_t10dif(const unsigned char *buffer, size_t len)
{
struct {
--
1.9.1
sbc_dif_generate() and sbc_dif_verify() currently assume that each
SG element for data transfer memory doesn't straddle the block size
boundary.
However, when using SG_IO ioctl, we can choose the data transfer
memory which doesn't satisfy that alignment requirement.
In order to handle such cases correctly, this change inverts the outer
loop to iterate data transfer memory and the inner loop to iterate
protection information and enables to calculate CRC for a block which
straddles multiple SG elements.
Signed-off-by: Akinobu Mita <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Herbert Xu <[email protected]>
Cc: "David S. Miller" <[email protected]>
Cc: [email protected]
Cc: Nicholas Bellinger <[email protected]>
Cc: Sagi Grimberg <[email protected]>
Cc: "Martin K. Petersen" <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: "James E.J. Bottomley" <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
* Changes from v2:
- Handle odd SG mapping correctly instead of giving up
drivers/target/target_core_sbc.c | 108 +++++++++++++++++++++++++--------------
1 file changed, 69 insertions(+), 39 deletions(-)
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index edba39f..33d2426 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -1182,27 +1182,43 @@ sbc_dif_generate(struct se_cmd *cmd)
{
struct se_device *dev = cmd->se_dev;
struct se_dif_v1_tuple *sdt;
- struct scatterlist *dsg, *psg = cmd->t_prot_sg;
+ struct scatterlist *dsg = cmd->t_data_sg, *psg;
sector_t sector = cmd->t_task_lba;
void *daddr, *paddr;
int i, j, offset = 0;
+ unsigned int block_size = dev->dev_attrib.block_size;
- for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) {
- daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
+ for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) {
paddr = kmap_atomic(sg_page(psg)) + psg->offset;
+ daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
- for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) {
+ for (j = 0; j < psg->length;
+ j += sizeof(struct se_dif_v1_tuple)) {
+ __u16 crc = 0;
+ unsigned int avail;
- if (offset >= psg->length) {
- kunmap_atomic(paddr);
- psg = sg_next(psg);
- paddr = kmap_atomic(sg_page(psg)) + psg->offset;
- offset = 0;
+ if (offset >= dsg->length) {
+ offset -= dsg->length;
+ kunmap_atomic(daddr);
+ dsg = sg_next(dsg);
+ daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
}
- sdt = paddr + offset;
- sdt->guard_tag = cpu_to_be16(crc_t10dif(daddr + j,
- dev->dev_attrib.block_size));
+ sdt = paddr + j;
+
+ avail = min(block_size, dsg->length - offset);
+ crc = crc_t10dif(daddr + offset, avail);
+ if (avail < block_size) {
+ kunmap_atomic(daddr);
+ dsg = sg_next(dsg);
+ daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
+ offset = block_size - avail;
+ crc = crc_t10dif_update(crc, daddr, offset);
+ } else {
+ offset += block_size;
+ }
+
+ sdt->guard_tag = cpu_to_be16(crc);
if (cmd->prot_type == TARGET_DIF_TYPE1_PROT)
sdt->ref_tag = cpu_to_be32(sector & 0xffffffff);
sdt->app_tag = 0;
@@ -1215,26 +1231,23 @@ sbc_dif_generate(struct se_cmd *cmd)
be32_to_cpu(sdt->ref_tag));
sector++;
- offset += sizeof(struct se_dif_v1_tuple);
}
- kunmap_atomic(paddr);
kunmap_atomic(daddr);
+ kunmap_atomic(paddr);
}
}
static sense_reason_t
sbc_dif_v1_verify(struct se_cmd *cmd, struct se_dif_v1_tuple *sdt,
- const void *p, sector_t sector, unsigned int ei_lba)
+ __u16 crc, sector_t sector, unsigned int ei_lba)
{
- struct se_device *dev = cmd->se_dev;
- int block_size = dev->dev_attrib.block_size;
__be16 csum;
if (!(cmd->prot_checks & TARGET_DIF_CHECK_GUARD))
goto check_ref;
- csum = cpu_to_be16(crc_t10dif(p, block_size));
+ csum = cpu_to_be16(crc);
if (sdt->guard_tag != csum) {
pr_err("DIFv1 checksum failed on sector %llu guard tag 0x%04x"
@@ -1316,26 +1329,32 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
{
struct se_device *dev = cmd->se_dev;
struct se_dif_v1_tuple *sdt;
- struct scatterlist *dsg;
+ struct scatterlist *dsg = cmd->t_data_sg;
sector_t sector = start;
void *daddr, *paddr;
- int i, j;
+ int i;
sense_reason_t rc;
+ int dsg_off = 0;
+ unsigned int block_size = dev->dev_attrib.block_size;
- for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) {
- daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
+ for (; psg && sector < start + sectors; psg = sg_next(psg)) {
paddr = kmap_atomic(sg_page(psg)) + psg->offset;
+ daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
- for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) {
-
- if (psg_off >= psg->length) {
- kunmap_atomic(paddr - psg->offset);
- psg = sg_next(psg);
- paddr = kmap_atomic(sg_page(psg)) + psg->offset;
- psg_off = 0;
+ for (i = psg_off; i < psg->length &&
+ sector < start + sectors;
+ i += sizeof(struct se_dif_v1_tuple)) {
+ __u16 crc;
+ unsigned int avail;
+
+ if (dsg_off >= dsg->length) {
+ dsg_off -= dsg->length;
+ kunmap_atomic(daddr);
+ dsg = sg_next(dsg);
+ daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
}
- sdt = paddr + psg_off;
+ sdt = paddr + i;
pr_debug("DIF READ sector: %llu guard_tag: 0x%04x"
" app_tag: 0x%04x ref_tag: %u\n",
@@ -1343,27 +1362,38 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
sdt->app_tag, be32_to_cpu(sdt->ref_tag));
if (sdt->app_tag == cpu_to_be16(0xffff)) {
- sector++;
- psg_off += sizeof(struct se_dif_v1_tuple);
- continue;
+ dsg_off += block_size;
+ goto next;
+ }
+
+ avail = min(block_size, dsg->length - dsg_off);
+
+ crc = crc_t10dif(daddr + dsg_off, avail);
+ if (avail < block_size) {
+ kunmap_atomic(daddr);
+ dsg = sg_next(dsg);
+ daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
+ dsg_off = block_size - avail;
+ crc = crc_t10dif_update(crc, daddr, dsg_off);
+ } else {
+ dsg_off += block_size;
}
- rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector,
- ei_lba);
+ rc = sbc_dif_v1_verify(cmd, sdt, crc, sector, ei_lba);
if (rc) {
- kunmap_atomic(paddr - psg->offset);
kunmap_atomic(daddr - dsg->offset);
+ kunmap_atomic(paddr - psg->offset);
cmd->bad_sector = sector;
return rc;
}
-
+next:
sector++;
ei_lba++;
- psg_off += sizeof(struct se_dif_v1_tuple);
}
- kunmap_atomic(paddr - psg->offset);
+ psg_off = 0;
kunmap_atomic(daddr - dsg->offset);
+ kunmap_atomic(paddr - psg->offset);
}
return 0;
--
1.9.1
On 4/25/2015 5:33 PM, Akinobu Mita wrote:
> sbc_dif_generate() and sbc_dif_verify() currently assume that each
> SG element for data transfer memory doesn't straddle the block size
> boundary.
>
> However, when using SG_IO ioctl, we can choose the data transfer
> memory which doesn't satisfy that alignment requirement.
>
> In order to handle such cases correctly, this change inverts the outer
> loop to iterate data transfer memory and the inner loop to iterate
> protection information and enables to calculate CRC for a block which
> straddles multiple SG elements.
>
> Signed-off-by: Akinobu Mita <[email protected]>
> Cc: Tim Chen <[email protected]>
> Cc: Herbert Xu <[email protected]>
> Cc: "David S. Miller" <[email protected]>
> Cc: [email protected]
> Cc: Nicholas Bellinger <[email protected]>
> Cc: Sagi Grimberg <[email protected]>
> Cc: "Martin K. Petersen" <[email protected]>
> Cc: Christoph Hellwig <[email protected]>
> Cc: "James E.J. Bottomley" <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> ---
> * Changes from v2:
> - Handle odd SG mapping correctly instead of giving up
>
> drivers/target/target_core_sbc.c | 108 +++++++++++++++++++++++++--------------
> 1 file changed, 69 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
> index edba39f..33d2426 100644
> --- a/drivers/target/target_core_sbc.c
> +++ b/drivers/target/target_core_sbc.c
> @@ -1182,27 +1182,43 @@ sbc_dif_generate(struct se_cmd *cmd)
> {
> struct se_device *dev = cmd->se_dev;
> struct se_dif_v1_tuple *sdt;
> - struct scatterlist *dsg, *psg = cmd->t_prot_sg;
> + struct scatterlist *dsg = cmd->t_data_sg, *psg;
> sector_t sector = cmd->t_task_lba;
> void *daddr, *paddr;
> int i, j, offset = 0;
> + unsigned int block_size = dev->dev_attrib.block_size;
>
> - for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) {
> - daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
> + for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) {
> paddr = kmap_atomic(sg_page(psg)) + psg->offset;
> + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
>
> - for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) {
> + for (j = 0; j < psg->length;
> + j += sizeof(struct se_dif_v1_tuple)) {
> + __u16 crc = 0;
> + unsigned int avail;
>
> - if (offset >= psg->length) {
> - kunmap_atomic(paddr);
> - psg = sg_next(psg);
> - paddr = kmap_atomic(sg_page(psg)) + psg->offset;
> - offset = 0;
> + if (offset >= dsg->length) {
> + offset -= dsg->length;
> + kunmap_atomic(daddr);
This unmap is inconsistent. You need to unmap (daddr - dsg->offset).
This applies throughout the patch.
> + dsg = sg_next(dsg);
> + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
> }
>
> - sdt = paddr + offset;
> - sdt->guard_tag = cpu_to_be16(crc_t10dif(daddr + j,
> - dev->dev_attrib.block_size));
> + sdt = paddr + j;
> +
> + avail = min(block_size, dsg->length - offset);
> + crc = crc_t10dif(daddr + offset, avail);
> + if (avail < block_size) {
> + kunmap_atomic(daddr);
> + dsg = sg_next(dsg);
> + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
> + offset = block_size - avail;
> + crc = crc_t10dif_update(crc, daddr, offset);
> + } else {
> + offset += block_size;
> + }
> +
> + sdt->guard_tag = cpu_to_be16(crc);
> if (cmd->prot_type == TARGET_DIF_TYPE1_PROT)
> sdt->ref_tag = cpu_to_be32(sector & 0xffffffff);
> sdt->app_tag = 0;
> @@ -1215,26 +1231,23 @@ sbc_dif_generate(struct se_cmd *cmd)
> be32_to_cpu(sdt->ref_tag));
>
> sector++;
> - offset += sizeof(struct se_dif_v1_tuple);
> }
>
> - kunmap_atomic(paddr);
> kunmap_atomic(daddr);
> + kunmap_atomic(paddr);
> }
> }
>
> static sense_reason_t
> sbc_dif_v1_verify(struct se_cmd *cmd, struct se_dif_v1_tuple *sdt,
> - const void *p, sector_t sector, unsigned int ei_lba)
> + __u16 crc, sector_t sector, unsigned int ei_lba)
> {
> - struct se_device *dev = cmd->se_dev;
> - int block_size = dev->dev_attrib.block_size;
> __be16 csum;
>
> if (!(cmd->prot_checks & TARGET_DIF_CHECK_GUARD))
> goto check_ref;
>
> - csum = cpu_to_be16(crc_t10dif(p, block_size));
> + csum = cpu_to_be16(crc);
>
> if (sdt->guard_tag != csum) {
> pr_err("DIFv1 checksum failed on sector %llu guard tag 0x%04x"
> @@ -1316,26 +1329,32 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
> {
> struct se_device *dev = cmd->se_dev;
> struct se_dif_v1_tuple *sdt;
> - struct scatterlist *dsg;
> + struct scatterlist *dsg = cmd->t_data_sg;
> sector_t sector = start;
> void *daddr, *paddr;
> - int i, j;
> + int i;
> sense_reason_t rc;
> + int dsg_off = 0;
> + unsigned int block_size = dev->dev_attrib.block_size;
>
> - for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) {
> - daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
> + for (; psg && sector < start + sectors; psg = sg_next(psg)) {
> paddr = kmap_atomic(sg_page(psg)) + psg->offset;
> + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
>
> - for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) {
> -
> - if (psg_off >= psg->length) {
> - kunmap_atomic(paddr - psg->offset);
> - psg = sg_next(psg);
> - paddr = kmap_atomic(sg_page(psg)) + psg->offset;
> - psg_off = 0;
> + for (i = psg_off; i < psg->length &&
> + sector < start + sectors;
> + i += sizeof(struct se_dif_v1_tuple)) {
> + __u16 crc;
> + unsigned int avail;
> +
> + if (dsg_off >= dsg->length) {
> + dsg_off -= dsg->length;
> + kunmap_atomic(daddr);
> + dsg = sg_next(dsg);
> + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
> }
>
> - sdt = paddr + psg_off;
> + sdt = paddr + i;
>
> pr_debug("DIF READ sector: %llu guard_tag: 0x%04x"
> " app_tag: 0x%04x ref_tag: %u\n",
> @@ -1343,27 +1362,38 @@ sbc_dif_verify(struct se_cmd *cmd, sector_t start, unsigned int sectors,
> sdt->app_tag, be32_to_cpu(sdt->ref_tag));
>
> if (sdt->app_tag == cpu_to_be16(0xffff)) {
> - sector++;
> - psg_off += sizeof(struct se_dif_v1_tuple);
> - continue;
> + dsg_off += block_size;
> + goto next;
> + }
> +
> + avail = min(block_size, dsg->length - dsg_off);
> +
> + crc = crc_t10dif(daddr + dsg_off, avail);
> + if (avail < block_size) {
> + kunmap_atomic(daddr);
> + dsg = sg_next(dsg);
> + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
> + dsg_off = block_size - avail;
> + crc = crc_t10dif_update(crc, daddr, dsg_off);
> + } else {
> + dsg_off += block_size;
> }
>
> - rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector,
> - ei_lba);
> + rc = sbc_dif_v1_verify(cmd, sdt, crc, sector, ei_lba);
> if (rc) {
> - kunmap_atomic(paddr - psg->offset);
> kunmap_atomic(daddr - dsg->offset);
> + kunmap_atomic(paddr - psg->offset);
> cmd->bad_sector = sector;
> return rc;
> }
> -
> +next:
> sector++;
> ei_lba++;
> - psg_off += sizeof(struct se_dif_v1_tuple);
> }
>
> - kunmap_atomic(paddr - psg->offset);
> + psg_off = 0;
> kunmap_atomic(daddr - dsg->offset);
> + kunmap_atomic(paddr - psg->offset);
> }
>
> return 0;
>
On 4/25/2015 5:33 PM, Akinobu Mita wrote:
> This patchset aims to fix several problems related to T10-PI support.
>
> These patches can be applied on top of Sagi's "[v1] Simlify dif_verify
> routines and fixup fileio protection information code" patchset.
>
Hi Akinobu,
I have given this some more thought,
This set adds DIF protected WRITE_SAME support. I don't think this
will work with a real transport that offload DIF computation (at least
not over RDMA - perhaps qlogic/emulex folks can comment on their
devices).
The problem is that the HBA does not have the write_same functionality
you introduce here, i.e. generate multiple same protection fields for a
single data block.
We can require the fabrics drivers to do that (probably compensate in
SW) or, we can have the write_same logic to live only in the
core/backend code.
In this case, for WRITE_SAME, have the fabrics generate/verify a single
data block (one integrity field) like they do today, and then the core
will expand it to the correct number of sectors using some form of
sbc_dif_expand_same()
I'm not sure which way is better...
Martin? Nic? Thoughts?
2015-04-26 19:07 GMT+09:00 Sagi Grimberg <[email protected]>:
> On 4/25/2015 5:33 PM, Akinobu Mita wrote:
>>
>> sbc_dif_generate() and sbc_dif_verify() currently assume that each
>> SG element for data transfer memory doesn't straddle the block size
>> boundary.
>>
>> However, when using SG_IO ioctl, we can choose the data transfer
>> memory which doesn't satisfy that alignment requirement.
>>
>> In order to handle such cases correctly, this change inverts the outer
>> loop to iterate data transfer memory and the inner loop to iterate
>> protection information and enables to calculate CRC for a block which
>> straddles multiple SG elements.
>>
>> Signed-off-by: Akinobu Mita <[email protected]>
>> Cc: Tim Chen <[email protected]>
>> Cc: Herbert Xu <[email protected]>
>> Cc: "David S. Miller" <[email protected]>
>> Cc: [email protected]
>> Cc: Nicholas Bellinger <[email protected]>
>> Cc: Sagi Grimberg <[email protected]>
>> Cc: "Martin K. Petersen" <[email protected]>
>> Cc: Christoph Hellwig <[email protected]>
>> Cc: "James E.J. Bottomley" <[email protected]>
>> Cc: [email protected]
>> Cc: [email protected]
>> ---
>> * Changes from v2:
>> - Handle odd SG mapping correctly instead of giving up
>>
>> drivers/target/target_core_sbc.c | 108
>> +++++++++++++++++++++++++--------------
>> 1 file changed, 69 insertions(+), 39 deletions(-)
>>
>> diff --git a/drivers/target/target_core_sbc.c
>> b/drivers/target/target_core_sbc.c
>> index edba39f..33d2426 100644
>> --- a/drivers/target/target_core_sbc.c
>> +++ b/drivers/target/target_core_sbc.c
>> @@ -1182,27 +1182,43 @@ sbc_dif_generate(struct se_cmd *cmd)
>> {
>> struct se_device *dev = cmd->se_dev;
>> struct se_dif_v1_tuple *sdt;
>> - struct scatterlist *dsg, *psg = cmd->t_prot_sg;
>> + struct scatterlist *dsg = cmd->t_data_sg, *psg;
>> sector_t sector = cmd->t_task_lba;
>> void *daddr, *paddr;
>> int i, j, offset = 0;
>> + unsigned int block_size = dev->dev_attrib.block_size;
>>
>> - for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) {
>> - daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
>> + for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) {
>> paddr = kmap_atomic(sg_page(psg)) + psg->offset;
>> + daddr = kmap_atomic(sg_page(dsg)) + dsg->offset;
>>
>> - for (j = 0; j < dsg->length; j +=
>> dev->dev_attrib.block_size) {
>> + for (j = 0; j < psg->length;
>> + j += sizeof(struct se_dif_v1_tuple)) {
>> + __u16 crc = 0;
>> + unsigned int avail;
>>
>> - if (offset >= psg->length) {
>> - kunmap_atomic(paddr);
>> - psg = sg_next(psg);
>> - paddr = kmap_atomic(sg_page(psg)) +
>> psg->offset;
>> - offset = 0;
>> + if (offset >= dsg->length) {
>> + offset -= dsg->length;
>> + kunmap_atomic(daddr);
>
>
> This unmap is inconsistent. You need to unmap (daddr - dsg->offset).
>
> This applies throughout the patch.
Thanks for pointing out. I'll fix them all.
>>>>> "Akinobu" == Akinobu Mita <[email protected]> writes:
Akinobu> This introduces crc_t10dif_update() which enables to calculate
Akinobu> CRC for a block which straddles multiple SG elements by calling
Akinobu> multiple times.
Looks good. We need this for the initiator side too.
Acked-by: Martin K. Petersen <[email protected]>
--
Martin K. Petersen Oracle Linux Engineering
>>>>> "Sagi" == Sagi Grimberg <[email protected]> writes:
Sagi> The problem is that the HBA does not have the write_same
Sagi> functionality you introduce here, i.e. generate multiple same
Sagi> protection fields for a single data block.
Adding support to DIX would be problematic since it would essentially
turn a WRITE SAME into a WRITE. You'd only do one block of DMA but you'd
get N blocks going over the wire.
In target mode it is conceivable to set up a prot sgl after parsing the
CDB and let the HBA do the work. But I'm not aware of any hardware that
allows that.
Sagi> In this case, for WRITE_SAME, have the fabrics generate/verify a
Sagi> single data block (one integrity field) like they do today, and
Sagi> then the core will expand it to the correct number of sectors
Sagi> using some form of sbc_dif_expand_same()
Yeah. In a simple world you'd just keep overriding the ref tag in the
received PI tuple. But for performance reasons you'll obviously want to
do I/O in units bigger than a single block. Blindly preallocating PI to
fit the entire I/O is also be problematic, however, since a block count
of 0 unfortunately means "the whole disk".
--
Martin K. Petersen Oracle Linux Engineering
On 4/28/2015 2:50 AM, Martin K. Petersen wrote:
>>>>>> "Sagi" == Sagi Grimberg <[email protected]> writes:
>
> Sagi> The problem is that the HBA does not have the write_same
> Sagi> functionality you introduce here, i.e. generate multiple same
> Sagi> protection fields for a single data block.
>
> Adding support to DIX would be problematic since it would essentially
> turn a WRITE SAME into a WRITE. You'd only do one block of DMA but you'd
> get N blocks going over the wire.
I thought that WRITE_SAME with DIX would include PI for the block that
is being sent over the wire, the initiator and target HBAs will verify
the single block integrity and the target backend will expand the PI
for the number of same sectors involved (unless the target backend
includes another wire, in this case it should handle it like the
initiator...)
>
> In target mode it is conceivable to set up a prot sgl after parsing the
> CDB and let the HBA do the work. But I'm not aware of any hardware that
> allows that.
I don't either, I think it would be simpler to have the target core
implement it instead of having each fabric driver doing the same thing.
>
> Sagi> In this case, for WRITE_SAME, have the fabrics generate/verify a
> Sagi> single data block (one integrity field) like they do today, and
> Sagi> then the core will expand it to the correct number of sectors
> Sagi> using some form of sbc_dif_expand_same()
>
> Yeah. In a simple world you'd just keep overriding the ref tag in the
> received PI tuple. But for performance reasons you'll obviously want to
> do I/O in units bigger than a single block. Blindly preallocating PI to
> fit the entire I/O is also be problematic, however, since a block count
> of 0 unfortunately means "the whole disk".
>
It seems that the only one that can handle write_same PI expansion is
the backend.
The initiator can pass PI for the block that is transferred, and the
target is responsible to handle it. The target will also pass this
single block with PI to it's backend. The backend is responsible to
update PI for all the sectors that are written.
Sounds right?
Sagi.
On Sat, 2015-04-25 at 23:33 +0900, Akinobu Mita wrote:
> This introduces crc_t10dif_update() which enables to calculate CRC
> for a block which straddles multiple SG elements by calling multiple
> times.
>
> Signed-off-by: Akinobu Mita <[email protected]>
> Cc: Tim Chen <[email protected]>
> Cc: Herbert Xu <[email protected]>
> Cc: "David S. Miller" <[email protected]>
> Cc: [email protected]
> Cc: Nicholas Bellinger <[email protected]>
> Cc: Sagi Grimberg <[email protected]>
> Cc: "Martin K. Petersen" <[email protected]>
> Cc: Christoph Hellwig <[email protected]>
> Cc: "James E.J. Bottomley" <[email protected]>
> Cc: [email protected]
> ---
> * New patch from v3
>
> include/linux/crc-t10dif.h | 1 +
> lib/crc-t10dif.c | 23 +++++++++++++++++++++++
> 2 files changed, 24 insertions(+)
>
> diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
> index cf53d07..d81961e 100644
> --- a/include/linux/crc-t10dif.h
> +++ b/include/linux/crc-t10dif.h
> @@ -9,5 +9,6 @@
> extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer,
> size_t len);
> extern __u16 crc_t10dif(unsigned char const *, size_t);
> +extern __u16 crc_t10dif_update(__u16 crc, unsigned char const *, size_t);
>
> #endif
> diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
> index dfe6ec1..7cdbe2e 100644
> --- a/lib/crc-t10dif.c
> +++ b/lib/crc-t10dif.c
> @@ -19,6 +19,29 @@
> static struct crypto_shash *crct10dif_tfm;
> static struct static_key crct10dif_fallback __read_mostly;
>
> +__u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
> +{
> + struct {
> + struct shash_desc shash;
> + char ctx[2];
> + } desc;
> + int err;
> +
> + if (static_key_false(&crct10dif_fallback))
> + return crc_t10dif_generic(crc, buffer, len);
> +
> + desc.shash.tfm = crct10dif_tfm;
> + desc.shash.flags = 0;
> +
> + err = crypto_shash_import(&desc.shash, &crc);
> + BUG_ON(err);
> + err = crypto_shash_update(&desc.shash, buffer, len);
> + BUG_ON(err);
> +
> + return *(__u16 *)desc.ctx;
> +}
> +EXPORT_SYMBOL(crc_t10dif_update);
> +
> __u16 crc_t10dif(const unsigned char *buffer, size_t len)
> {
> struct {
There are a lot of duplicated code between crc_t10dif_update and
crc_t10dif. The only difference is for the update function
we import the crc value. I will prefer that we consolidate the code
into a local inline function that crc_t10dif_update and
crc_t10dif invoke to get rid of all the duplication.
Probably something like:
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index dfe6ec1..0248f78 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -19,7 +19,7 @@
static struct crypto_shash *crct10dif_tfm;
static struct static_key crct10dif_fallback __read_mostly;
-__u16 crc_t10dif(const unsigned char *buffer, size_t len)
+static inline __u16 __crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len, bool update)
{
struct {
struct shash_desc shash;
@@ -28,17 +28,33 @@ __u16 crc_t10dif(const unsigned char *buffer, size_t len)
int err;
if (static_key_false(&crct10dif_fallback))
- return crc_t10dif_generic(0, buffer, len);
+ return crc_t10dif_generic(crc, buffer, len);
desc.shash.tfm = crct10dif_tfm;
desc.shash.flags = 0;
- *(__u16 *)desc.ctx = 0;
+
+ if (update) {
+ err = crypto_shash_import(&desc.shash, &crc);
+ BUG_ON(err);
+ } else
+ *(__u16 *)desc.ctx = 0;
err = crypto_shash_update(&desc.shash, buffer, len);
BUG_ON(err);
return *(__u16 *)desc.ctx;
}
+
+__u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
+{
+ return __crc_t10dif_update(crc, buffer, len, true);
+}
+EXPORT_SYMBOL(crc_t10dif_update);
+
+__u16 crc_t10dif(const unsigned char *buffer, size_t len)
+{
+ return __crc_t10dif_update(0, buffer, len, false);
+}
EXPORT_SYMBOL(crc_t10dif);
Thanks.
Tim
>>>>> "Sagi" == Sagi Grimberg <[email protected]> writes:
Sagi,
Sagi> I thought that WRITE_SAME with DIX would include PI for the block
Sagi> that is being sent over the wire, the initiator and target HBAs
Sagi> will verify the single block integrity and the target backend will
Sagi> expand the PI for the number of same sectors involved (unless the
Sagi> target backend includes another wire, in this case it should
Sagi> handle it like the initiator...)
Yep. I'm just saying there's nothing to be done wrt. DIX and WRITE SAME
on the initiator side. If you were to do something special it would
effectively mean turning WRITE SAME into a WRITE which kind of defeats
the purpose.
>> In target mode it is conceivable to set up a prot sgl after parsing
>> the CDB and let the HBA do the work. But I'm not aware of any
>> hardware that allows that.
Sagi> I don't either, I think it would be simpler to have the target
Sagi> core implement it instead of having each fabric driver doing the
Sagi> same thing.
Yep.
Sagi> The initiator can pass PI for the block that is transferred, and
Sagi> the target is responsible to handle it. The target will also pass
Sagi> this single block with PI to it's backend. The backend is
Sagi> responsible to update PI for all the sectors that are written.
Sagi> Sounds right?
Yes, I agree.
--
Martin K. Petersen Oracle Linux Engineering
>>>>> "Tim" == Tim Chen <[email protected]> writes:
Tim> There are a lot of duplicated code between crc_t10dif_update and
Tim> crc_t10dif. The only difference is for the update function we
Tim> import the crc value. I will prefer that we consolidate the code
Tim> into a local inline function that crc_t10dif_update and crc_t10dif
Tim> invoke to get rid of all the duplication.
I'm OK with that approach.
--
Martin K. Petersen Oracle Linux Engineering
2015-04-29 2:38 GMT+09:00 Tim Chen <[email protected]>:
> There are a lot of duplicated code between crc_t10dif_update and
> crc_t10dif. The only difference is for the update function
> we import the crc value. I will prefer that we consolidate the code
> into a local inline function that crc_t10dif_update and
> crc_t10dif invoke to get rid of all the duplication.
>
> Probably something like:
Looks good. I'll take this code.
> diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
> index dfe6ec1..0248f78 100644
> --- a/lib/crc-t10dif.c
> +++ b/lib/crc-t10dif.c
> @@ -19,7 +19,7 @@
> static struct crypto_shash *crct10dif_tfm;
> static struct static_key crct10dif_fallback __read_mostly;
>
> -__u16 crc_t10dif(const unsigned char *buffer, size_t len)
> +static inline __u16 __crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len, bool update)
> {
> struct {
> struct shash_desc shash;
> @@ -28,17 +28,33 @@ __u16 crc_t10dif(const unsigned char *buffer, size_t len)
> int err;
>
> if (static_key_false(&crct10dif_fallback))
> - return crc_t10dif_generic(0, buffer, len);
> + return crc_t10dif_generic(crc, buffer, len);
>
> desc.shash.tfm = crct10dif_tfm;
> desc.shash.flags = 0;
> - *(__u16 *)desc.ctx = 0;
> +
> + if (update) {
> + err = crypto_shash_import(&desc.shash, &crc);
> + BUG_ON(err);
> + } else
> + *(__u16 *)desc.ctx = 0;
>
> err = crypto_shash_update(&desc.shash, buffer, len);
> BUG_ON(err);
>
> return *(__u16 *)desc.ctx;
> }
> +
> +__u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
> +{
> + return __crc_t10dif_update(crc, buffer, len, true);
> +}
> +EXPORT_SYMBOL(crc_t10dif_update);
> +
> +__u16 crc_t10dif(const unsigned char *buffer, size_t len)
> +{
> + return __crc_t10dif_update(0, buffer, len, false);
> +}
> EXPORT_SYMBOL(crc_t10dif);
>
>
> Thanks.
>
> Tim
>
On Tue, Apr 28, 2015 at 10:38:36AM -0700, Tim Chen wrote:
>
> + if (update) {
> + err = crypto_shash_import(&desc.shash, &crc);
> + BUG_ON(err);
You don't even have to make this conditional. Just always do
the import since it's just doing a memcpy anyway.
Cheers,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Wed, 2015-04-29 at 08:49 +0800, Herbert Xu wrote:
> On Tue, Apr 28, 2015 at 10:38:36AM -0700, Tim Chen wrote:
> >
> > + if (update) {
> > + err = crypto_shash_import(&desc.shash, &crc);
> > + BUG_ON(err);
>
> You don't even have to make this conditional. Just always do
> the import since it's just doing a memcpy anyway.
>
Cool, this will simplify things more :)
Tim