SCSI device has max_xfer_size and opt_xfer_size,
but current kernel uses only opt_xfer_size.
It causes the limitation on setting IO chunk size,
although it can support larger one.
So, I propose this patch to use max_xfer_size in case it has valid value.
It can support to use the larger chunk IO on SCSI device.
For example,
This patch is effective in case of some SCSI device like UFS
with opt_xfer_size 512KB, queue depth 32 and max_xfer_size over 512KB.
I expect both the performance improvement
and the efficiency use of smaller command queue depth.
Signed-off-by: Manjong Lee <[email protected]>
---
drivers/scsi/sd.c | 56 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 52 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 679c2c025047..de59f01c1304 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3108,6 +3108,53 @@ static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer)
sdkp->security = 1;
}
+static bool sd_validate_max_xfer_size(struct scsi_disk *sdkp,
+ unsigned int dev_max)
+{
+ struct scsi_device *sdp = sdkp->device;
+ unsigned int max_xfer_bytes =
+ logical_to_bytes(sdp, sdkp->max_xfer_blocks);
+
+ if (sdkp->max_xfer_blocks == 0)
+ return false;
+
+ if (sdkp->max_xfer_blocks > SD_MAX_XFER_BLOCKS) {
+ sd_first_printk(KERN_WARNING, sdkp,
+ "Maximal transfer size %u logical blocks " \
+ "> sd driver limit (%u logical blocks)\n",
+ sdkp->max_xfer_blocks, SD_DEF_XFER_BLOCKS);
+ return false;
+ }
+
+ if (sdkp->max_xfer_blocks > dev_max) {
+ sd_first_printk(KERN_WARNING, sdkp,
+ "Maximal transfer size %u logical blocks "
+ "> dev_max (%u logical blocks)\n",
+ sdkp->max_xfer_blocks, dev_max);
+ return false;
+ }
+
+ if (max_xfer_bytes < PAGE_SIZE) {
+ sd_first_printk(KERN_WARNING, sdkp,
+ "Maximal transfer size %u bytes < " \
+ "PAGE_SIZE (%u bytes)\n",
+ max_xfer_bytes, (unsigned int)PAGE_SIZE);
+ return false;
+ }
+
+ if (max_xfer_bytes & (sdkp->physical_block_size - 1)) {
+ sd_first_printk(KERN_WARNING, sdkp,
+ "Maximal transfer size %u bytes not a " \
+ "multiple of physical block size (%u bytes)\n",
+ max_xfer_bytes, sdkp->physical_block_size);
+ return false;
+ }
+
+ sd_first_printk(KERN_INFO, sdkp, "Maximal transfer size %u bytes\n",
+ max_xfer_bytes);
+ return true;
+}
+
/*
* Determine the device's preferred I/O size for reads and writes
* unless the reported value is unreasonably small, large, not a
@@ -3233,12 +3280,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
/* Initial block count limit based on CDB TRANSFER LENGTH field size. */
dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
-
- /* Some devices report a maximum block count for READ/WRITE requests. */
- dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max);
- if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
+ if (sd_validate_max_xfer_size(sdkp, dev_max)) {
+ q->limits.io_opt = 0;
+ rw_max = logical_to_sectors(sdp, sdkp->max_xfer_blocks);
+ q->limits.max_dev_sectors = rw_max;
+ } else if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
} else {
--
2.29.0
Add recipients for more reviews.
>SCSI device has max_xfer_size and opt_xfer_size,
>but current kernel uses only opt_xfer_size.
>
>It causes the limitation on setting IO chunk size,
>although it can support larger one.
>
>So, I propose this patch to use max_xfer_size in case it has valid value.
>It can support to use the larger chunk IO on SCSI device.
>
>For example,
>This patch is effective in case of some SCSI device like UFS
>with opt_xfer_size 512KB, queue depth 32 and max_xfer_size over 512KB.
>
>I expect both the performance improvement
>and the efficiency use of smaller command queue depth.
>
>Signed-off-by: Manjong Lee <[email protected]>
>---
>drivers/scsi/sd.c | 56 +++++++++++++++++++++++++++++++++++++++++++----
>1 file changed, 52 insertions(+), 4 deletions(-)
>
>diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
>index 679c2c025047..de59f01c1304 100644
>--- a/drivers/scsi/sd.c
>+++ b/drivers/scsi/sd.c
>@@ -3108,6 +3108,53 @@ static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer)
>sdkp->security = 1;
>}
>
>+static bool sd_validate_max_xfer_size(struct scsi_disk *sdkp,
>+ unsigned int dev_max)
>+{
>+ struct scsi_device *sdp = sdkp->device;
>+ unsigned int max_xfer_bytes =
>+ logical_to_bytes(sdp, sdkp->max_xfer_blocks);
>+
>+ if (sdkp->max_xfer_blocks == 0)
>+ return false;
>+
>+ if (sdkp->max_xfer_blocks > SD_MAX_XFER_BLOCKS) {
>+ sd_first_printk(KERN_WARNING, sdkp,
>+ "Maximal transfer size %u logical blocks " \
>+ "> sd driver limit (%u logical blocks)\n",
>+ sdkp->max_xfer_blocks, SD_DEF_XFER_BLOCKS);
>+ return false;
>+ }
>+
>+ if (sdkp->max_xfer_blocks > dev_max) {
>+ sd_first_printk(KERN_WARNING, sdkp,
>+ "Maximal transfer size %u logical blocks "
>+ "> dev_max (%u logical blocks)\n",
>+ sdkp->max_xfer_blocks, dev_max);
>+ return false;
>+ }
>+
>+ if (max_xfer_bytes < PAGE_SIZE) {
>+ sd_first_printk(KERN_WARNING, sdkp,
>+ "Maximal transfer size %u bytes < " \
>+ "PAGE_SIZE (%u bytes)\n",
>+ max_xfer_bytes, (unsigned int)PAGE_SIZE);
>+ return false;
>+ }
>+
>+ if (max_xfer_bytes & (sdkp->physical_block_size - 1)) {
>+ sd_first_printk(KERN_WARNING, sdkp,
>+ "Maximal transfer size %u bytes not a " \
>+ "multiple of physical block size (%u bytes)\n",
>+ max_xfer_bytes, sdkp->physical_block_size);
>+ return false;
>+ }
>+
>+ sd_first_printk(KERN_INFO, sdkp, "Maximal transfer size %u bytes\n",
>+ max_xfer_bytes);
>+ return true;
>+}
>+
>/*
>* Determine the device's preferred I/O size for reads and writes
>* unless the reported value is unreasonably small, large, not a
>@@ -3233,12 +3280,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
>
>/* Initial block count limit based on CDB TRANSFER LENGTH field size. */
>dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
>-
>- /* Some devices report a maximum block count for READ/WRITE requests. */
>- dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
>q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max);
>
>- if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
>+ if (sd_validate_max_xfer_size(sdkp, dev_max)) {
>+ q->limits.io_opt = 0;
>+ rw_max = logical_to_sectors(sdp, sdkp->max_xfer_blocks);
>+ q->limits.max_dev_sectors = rw_max;
>+ } else if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
>q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
>rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
>} else {
>--
>2.29.0
>
>
On 2021/01/20 15:45, Manjong Lee wrote:
> Add recipients for more reviews.
Please resend instead of replying to your own patch. The reply quoting corrupts
the patch.
The patch title is very long.
>
>> SCSI device has max_xfer_size and opt_xfer_size,
>> but current kernel uses only opt_xfer_size.
>>
>> It causes the limitation on setting IO chunk size,
>> although it can support larger one.
>>
>> So, I propose this patch to use max_xfer_size in case it has valid value.
>> It can support to use the larger chunk IO on SCSI device.
>>
>> For example,
>> This patch is effective in case of some SCSI device like UFS
>> with opt_xfer_size 512KB, queue depth 32 and max_xfer_size over 512KB.
>>
>> I expect both the performance improvement
>> and the efficiency use of smaller command queue depth.
This can be measured, and this commit message should include results to show how
effective this change is.
>>
>> Signed-off-by: Manjong Lee <[email protected]>
>> ---
>> drivers/scsi/sd.c | 56 +++++++++++++++++++++++++++++++++++++++++++----
>> 1 file changed, 52 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
>> index 679c2c025047..de59f01c1304 100644
>> --- a/drivers/scsi/sd.c
>> +++ b/drivers/scsi/sd.c
>> @@ -3108,6 +3108,53 @@ static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer)
>> sdkp->security = 1;
>> }
>>
>> +static bool sd_validate_max_xfer_size(struct scsi_disk *sdkp,
>> + unsigned int dev_max)
>> +{
>> + struct scsi_device *sdp = sdkp->device;
>> + unsigned int max_xfer_bytes =
>> + logical_to_bytes(sdp, sdkp->max_xfer_blocks);
>> +
>> + if (sdkp->max_xfer_blocks == 0)
>> + return false;
>> +
>> + if (sdkp->max_xfer_blocks > SD_MAX_XFER_BLOCKS) {
>> + sd_first_printk(KERN_WARNING, sdkp,
>> + "Maximal transfer size %u logical blocks " \
>> + "> sd driver limit (%u logical blocks)\n",
>> + sdkp->max_xfer_blocks, SD_DEF_XFER_BLOCKS);
>> + return false;
>> + }
>> +
>> + if (sdkp->max_xfer_blocks > dev_max) {
>> + sd_first_printk(KERN_WARNING, sdkp,
>> + "Maximal transfer size %u logical blocks "
>> + "> dev_max (%u logical blocks)\n",
>> + sdkp->max_xfer_blocks, dev_max);
>> + return false;
>> + }
>> +
>> + if (max_xfer_bytes < PAGE_SIZE) {
>> + sd_first_printk(KERN_WARNING, sdkp,
>> + "Maximal transfer size %u bytes < " \
>> + "PAGE_SIZE (%u bytes)\n",
>> + max_xfer_bytes, (unsigned int)PAGE_SIZE);
>> + return false;
>> + }
>> +
>> + if (max_xfer_bytes & (sdkp->physical_block_size - 1)) {
>> + sd_first_printk(KERN_WARNING, sdkp,
>> + "Maximal transfer size %u bytes not a " \
>> + "multiple of physical block size (%u bytes)\n",
>> + max_xfer_bytes, sdkp->physical_block_size);
>> + return false;
>> + }
>> +
>> + sd_first_printk(KERN_INFO, sdkp, "Maximal transfer size %u bytes\n",
>> + max_xfer_bytes);
>> + return true;
>> +}
Except for the order of the comparisons against SD_MAX_XFER_BLOCKS and dev_max,
this function looks identical to sd_validate_opt_xfer_size(), modulo the use of
max_xfer_blocks instead of opt_xfer_blocks. Can't you turn this into something like:
static bool sd_validate_max_xfer_size(struct scsi_disk *sdkp,
const char *name,
unsigned int xfer_blocks,
unsigned int dev_max)
To allow checking both opt_xfer_blocks and max_xfer_blocks ?
>> +
>> /*
>> * Determine the device's preferred I/O size for reads and writes
>> * unless the reported value is unreasonably small, large, not a
>> @@ -3233,12 +3280,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
>>
>> /* Initial block count limit based on CDB TRANSFER LENGTH field size. */
>> dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
This looks weird: no indentation. Care to resend ?
>> -
>> - /* Some devices report a maximum block count for READ/WRITE requests. */
>> - dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
>> q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max);
>>
>> - if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
>> + if (sd_validate_max_xfer_size(sdkp, dev_max)) {
>> + q->limits.io_opt = 0;
>> + rw_max = logical_to_sectors(sdp, sdkp->max_xfer_blocks);
>> + q->limits.max_dev_sectors = rw_max;
>> + } else if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
This does not look correct to me. This renders the device reported
opt_xfer_blocks useless.
The unmodified code sets dev_max to the min of SD_MAX_XFER_BLOCKS or
SD_DEF_XFER_BLOCKS and of the device reported max_xfer_blocks. The result of
this is used as the device max_dev_sectors queue limit, which in turn is used to
set the max_hw_sectors queue limit accounting for the adapter limits too.
opt_xfer_blocks, if it is valid, will be used to set the io_opt queue limit,
which is a hint. This hint is used to optimize the "soft" max_sectors command
limit used by the block layer to limit command size if the value of
opt_xfer_blocks is smaller than the limit initially set with max_xfer_blocks.
So if for your device max_sectors end up being too small, it is likely because
the device itself is reporting an opt_xfer_blocks value that is too small for
its own good. The max_sectors limit can be manually increased with "echo xxx >
/sys/block/sdX/queue/max_sectors_kb". A udev rule can be used to handle this
autmatically if needed.
But to get a saner default for that device, I do not think that this patch is
the right solution. Ideally, the device peculiarity should be handled with a
quirk, but that is not used in scsi. So beside the udev rule trick, I am not
sure what the right approach is here.
>> q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
>> rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
>> } else {
>> --
>> 2.29.0
>>
>>
>
--
Damien Le Moal
Western Digital Research
> On 2021/01/20 15:45, Manjong Lee wrote:
> > Add recipients for more reviews.
>
> Please resend instead of replying to your own patch. The reply quoting corrupts
> the patch.
>
> The patch title is very long.
>
> >
> >> SCSI device has max_xfer_size and opt_xfer_size,
> >> but current kernel uses only opt_xfer_size.
> >>
> >> It causes the limitation on setting IO chunk size,
> >> although it can support larger one.
> >>
> >> So, I propose this patch to use max_xfer_size in case it has valid value.
> >> It can support to use the larger chunk IO on SCSI device.
> >>
> >> For example,
> >> This patch is effective in case of some SCSI device like UFS
> >> with opt_xfer_size 512KB, queue depth 32 and max_xfer_size over 512KB.
> >>
> >> I expect both the performance improvement
> >> and the efficiency use of smaller command queue depth.
>
> This can be measured, and this commit message should include results to show how
> effective this change is.
>
> >>
> >> Signed-off-by: Manjong Lee <[email protected]>
> >> ---
> >> drivers/scsi/sd.c | 56 +++++++++++++++++++++++++++++++++++++++++++----
> >> 1 file changed, 52 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> >> index 679c2c025047..de59f01c1304 100644
> >> --- a/drivers/scsi/sd.c
> >> +++ b/drivers/scsi/sd.c
> >> @@ -3108,6 +3108,53 @@ static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer)
> >> sdkp->security = 1;
> >> }
> >>
> >> +static bool sd_validate_max_xfer_size(struct scsi_disk *sdkp,
> >> + unsigned int dev_max)
> >> +{
> >> + struct scsi_device *sdp = sdkp->device;
> >> + unsigned int max_xfer_bytes =
> >> + logical_to_bytes(sdp, sdkp->max_xfer_blocks);
> >> +
> >> + if (sdkp->max_xfer_blocks == 0)
> >> + return false;
> >> +
> >> + if (sdkp->max_xfer_blocks > SD_MAX_XFER_BLOCKS) {
> >> + sd_first_printk(KERN_WARNING, sdkp,
> >> + "Maximal transfer size %u logical blocks " \
> >> + "> sd driver limit (%u logical blocks)\n",
> >> + sdkp->max_xfer_blocks, SD_DEF_XFER_BLOCKS);
> >> + return false;
> >> + }
> >> +
> >> + if (sdkp->max_xfer_blocks > dev_max) {
> >> + sd_first_printk(KERN_WARNING, sdkp,
> >> + "Maximal transfer size %u logical blocks "
> >> + "> dev_max (%u logical blocks)\n",
> >> + sdkp->max_xfer_blocks, dev_max);
> >> + return false;
> >> + }
> >> +
> >> + if (max_xfer_bytes < PAGE_SIZE) {
> >> + sd_first_printk(KERN_WARNING, sdkp,
> >> + "Maximal transfer size %u bytes < " \
> >> + "PAGE_SIZE (%u bytes)\n",
> >> + max_xfer_bytes, (unsigned int)PAGE_SIZE);
> >> + return false;
> >> + }
> >> +
> >> + if (max_xfer_bytes & (sdkp->physical_block_size - 1)) {
> >> + sd_first_printk(KERN_WARNING, sdkp,
> >> + "Maximal transfer size %u bytes not a " \
> >> + "multiple of physical block size (%u bytes)\n",
> >> + max_xfer_bytes, sdkp->physical_block_size);
> >> + return false;
> >> + }
> >> +
> >> + sd_first_printk(KERN_INFO, sdkp, "Maximal transfer size %u bytes\n",
> >> + max_xfer_bytes);
> >> + return true;
> >> +}
>
> Except for the order of the comparisons against SD_MAX_XFER_BLOCKS and dev_max,
> this function looks identical to sd_validate_opt_xfer_size(), modulo the use of
> max_xfer_blocks instead of opt_xfer_blocks. Can't you turn this into something like:
>
> static bool sd_validate_max_xfer_size(struct scsi_disk *sdkp,
> const char *name,
> unsigned int xfer_blocks,
> unsigned int dev_max)
>
> To allow checking both opt_xfer_blocks and max_xfer_blocks ?
>
> >> +
> >> /*
> >> * Determine the device's preferred I/O size for reads and writes
> >> * unless the reported value is unreasonably small, large, not a
> >> @@ -3233,12 +3280,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
> >>
> >> /* Initial block count limit based on CDB TRANSFER LENGTH field size. */
> >> dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
>
> This looks weird: no indentation. Care to resend ?
>
> >> -
> >> - /* Some devices report a maximum block count for READ/WRITE requests. */
> >> - dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
> >> q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max);
> >>
> >> - if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
> >> + if (sd_validate_max_xfer_size(sdkp, dev_max)) {
> >> + q->limits.io_opt = 0;
> >> + rw_max = logical_to_sectors(sdp, sdkp->max_xfer_blocks);
> >> + q->limits.max_dev_sectors = rw_max;
> >> + } else if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
>
> This does not look correct to me. This renders the device reported
> opt_xfer_blocks useless.
>
> The unmodified code sets dev_max to the min of SD_MAX_XFER_BLOCKS or
> SD_DEF_XFER_BLOCKS and of the device reported max_xfer_blocks. The result of
> this is used as the device max_dev_sectors queue limit, which in turn is used to
> set the max_hw_sectors queue limit accounting for the adapter limits too.
>
> opt_xfer_blocks, if it is valid, will be used to set the io_opt queue limit,
> which is a hint. This hint is used to optimize the "soft" max_sectors command
> limit used by the block layer to limit command size if the value of
> opt_xfer_blocks is smaller than the limit initially set with max_xfer_blocks.
>
> So if for your device max_sectors end up being too small, it is likely because
> the device itself is reporting an opt_xfer_blocks value that is too small for
> its own good. The max_sectors limit can be manually increased with "echo xxx >
> /sys/block/sdX/queue/max_sectors_kb". A udev rule can be used to handle this
> autmatically if needed.
>
> But to get a saner default for that device, I do not think that this patch is
> the right solution. Ideally, the device peculiarity should be handled with a
> quirk, but that is not used in scsi. So beside the udev rule trick, I am not
> sure what the right approach is here.
>
This approach is for using sdkp->max_xfer_blocks as a rw_max.
There are no way to use it now when sdkp->opt_xfer_blocks is valid.
In my case, scsi device reports both of sdkp->max_xfer_blocks, and
sdkp->opt_xfer_blocks.
How about set larger valid value between sdkp->max_xfer_blocks,
and sdkp->opt_xfer_blocks to rw_max?
> >> q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
> >> rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
> >> } else {
> >> --
> >> 2.29.0
> >>
> >>
> >
On 2021/01/22 16:24, Changheun Lee wrote:
>> On 2021/01/20 15:45, Manjong Lee wrote:
>>> Add recipients for more reviews.
>>
>> Please resend instead of replying to your own patch. The reply quoting corrupts
>> the patch.
>>
>> The patch title is very long.
>>
>>>
>>>> SCSI device has max_xfer_size and opt_xfer_size,
>>>> but current kernel uses only opt_xfer_size.
>>>>
>>>> It causes the limitation on setting IO chunk size,
>>>> although it can support larger one.
>>>>
>>>> So, I propose this patch to use max_xfer_size in case it has valid value.
>>>> It can support to use the larger chunk IO on SCSI device.
>>>>
>>>> For example,
>>>> This patch is effective in case of some SCSI device like UFS
>>>> with opt_xfer_size 512KB, queue depth 32 and max_xfer_size over 512KB.
>>>>
>>>> I expect both the performance improvement
>>>> and the efficiency use of smaller command queue depth.
>>
>> This can be measured, and this commit message should include results to show how
>> effective this change is.
>>
>>>>
>>>> Signed-off-by: Manjong Lee <[email protected]>
>>>> ---
>>>> drivers/scsi/sd.c | 56 +++++++++++++++++++++++++++++++++++++++++++----
>>>> 1 file changed, 52 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
>>>> index 679c2c025047..de59f01c1304 100644
>>>> --- a/drivers/scsi/sd.c
>>>> +++ b/drivers/scsi/sd.c
>>>> @@ -3108,6 +3108,53 @@ static void sd_read_security(struct scsi_disk *sdkp, unsigned char *buffer)
>>>> sdkp->security = 1;
>>>> }
>>>>
>>>> +static bool sd_validate_max_xfer_size(struct scsi_disk *sdkp,
>>>> + unsigned int dev_max)
>>>> +{
>>>> + struct scsi_device *sdp = sdkp->device;
>>>> + unsigned int max_xfer_bytes =
>>>> + logical_to_bytes(sdp, sdkp->max_xfer_blocks);
>>>> +
>>>> + if (sdkp->max_xfer_blocks == 0)
>>>> + return false;
>>>> +
>>>> + if (sdkp->max_xfer_blocks > SD_MAX_XFER_BLOCKS) {
>>>> + sd_first_printk(KERN_WARNING, sdkp,
>>>> + "Maximal transfer size %u logical blocks " \
>>>> + "> sd driver limit (%u logical blocks)\n",
>>>> + sdkp->max_xfer_blocks, SD_DEF_XFER_BLOCKS);
>>>> + return false;
>>>> + }
>>>> +
>>>> + if (sdkp->max_xfer_blocks > dev_max) {
>>>> + sd_first_printk(KERN_WARNING, sdkp,
>>>> + "Maximal transfer size %u logical blocks "
>>>> + "> dev_max (%u logical blocks)\n",
>>>> + sdkp->max_xfer_blocks, dev_max);
>>>> + return false;
>>>> + }
>>>> +
>>>> + if (max_xfer_bytes < PAGE_SIZE) {
>>>> + sd_first_printk(KERN_WARNING, sdkp,
>>>> + "Maximal transfer size %u bytes < " \
>>>> + "PAGE_SIZE (%u bytes)\n",
>>>> + max_xfer_bytes, (unsigned int)PAGE_SIZE);
>>>> + return false;
>>>> + }
>>>> +
>>>> + if (max_xfer_bytes & (sdkp->physical_block_size - 1)) {
>>>> + sd_first_printk(KERN_WARNING, sdkp,
>>>> + "Maximal transfer size %u bytes not a " \
>>>> + "multiple of physical block size (%u bytes)\n",
>>>> + max_xfer_bytes, sdkp->physical_block_size);
>>>> + return false;
>>>> + }
>>>> +
>>>> + sd_first_printk(KERN_INFO, sdkp, "Maximal transfer size %u bytes\n",
>>>> + max_xfer_bytes);
>>>> + return true;
>>>> +}
>>
>> Except for the order of the comparisons against SD_MAX_XFER_BLOCKS and dev_max,
>> this function looks identical to sd_validate_opt_xfer_size(), modulo the use of
>> max_xfer_blocks instead of opt_xfer_blocks. Can't you turn this into something like:
>>
>> static bool sd_validate_max_xfer_size(struct scsi_disk *sdkp,
>> const char *name,
>> unsigned int xfer_blocks,
>> unsigned int dev_max)
>>
>> To allow checking both opt_xfer_blocks and max_xfer_blocks ?
>>
>>>> +
>>>> /*
>>>> * Determine the device's preferred I/O size for reads and writes
>>>> * unless the reported value is unreasonably small, large, not a
>>>> @@ -3233,12 +3280,13 @@ static int sd_revalidate_disk(struct gendisk *disk)
>>>>
>>>> /* Initial block count limit based on CDB TRANSFER LENGTH field size. */
>>>> dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
>>
>> This looks weird: no indentation. Care to resend ?
>>
>>>> -
>>>> - /* Some devices report a maximum block count for READ/WRITE requests. */
>>>> - dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
>>>> q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max);
>>>>
>>>> - if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
>>>> + if (sd_validate_max_xfer_size(sdkp, dev_max)) {
>>>> + q->limits.io_opt = 0;
>>>> + rw_max = logical_to_sectors(sdp, sdkp->max_xfer_blocks);
>>>> + q->limits.max_dev_sectors = rw_max;
>>>> + } else if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
>>
>> This does not look correct to me. This renders the device reported
>> opt_xfer_blocks useless.
>>
>> The unmodified code sets dev_max to the min of SD_MAX_XFER_BLOCKS or
>> SD_DEF_XFER_BLOCKS and of the device reported max_xfer_blocks. The result of
>> this is used as the device max_dev_sectors queue limit, which in turn is used to
>> set the max_hw_sectors queue limit accounting for the adapter limits too.
>>
>> opt_xfer_blocks, if it is valid, will be used to set the io_opt queue limit,
>> which is a hint. This hint is used to optimize the "soft" max_sectors command
>> limit used by the block layer to limit command size if the value of
>> opt_xfer_blocks is smaller than the limit initially set with max_xfer_blocks.
>>
>> So if for your device max_sectors end up being too small, it is likely because
>> the device itself is reporting an opt_xfer_blocks value that is too small for
>> its own good. The max_sectors limit can be manually increased with "echo xxx >
>> /sys/block/sdX/queue/max_sectors_kb". A udev rule can be used to handle this
>> autmatically if needed.
>>
>> But to get a saner default for that device, I do not think that this patch is
>> the right solution. Ideally, the device peculiarity should be handled with a
>> quirk, but that is not used in scsi. So beside the udev rule trick, I am not
>> sure what the right approach is here.
>>
>
> This approach is for using sdkp->max_xfer_blocks as a rw_max.
> There are no way to use it now when sdkp->opt_xfer_blocks is valid.
> In my case, scsi device reports both of sdkp->max_xfer_blocks, and
> sdkp->opt_xfer_blocks.
>
> How about set larger valid value between sdkp->max_xfer_blocks,
> and sdkp->opt_xfer_blocks to rw_max?
Again, if your device reports an opt_xfer_blocks value that is too small for its
own good, that is a problem with this device. The solution for that is not to
change something that will affect *all* other storage devices, including those
with a perfectly valid opt_xfer_blocks value.
I think that the solution should be at the LLD level, for that device only. But
I am not sure how to communicate a quirk for opt_xfer_blocks back to the generic
sd driver. You should explore a solution like that. Others may have ideas about
this too. Wait for more comments.
>
>>>> q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
>>>> rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
>>>> } else {
>>>> --
>>>> 2.29.0
>>>>
>>>>
>>>
>
--
Damien Le Moal
Western Digital Research
Damien,
>> How about set larger valid value between sdkp->max_xfer_blocks,
>> and sdkp->opt_xfer_blocks to rw_max?
>
> Again, if your device reports an opt_xfer_blocks value that is too
> small for its own good, that is a problem with this device.
Correct. It is very much intentional that we do not default to issuing
the largest commands supported by the physical hardware.
If the device is not reporting an optimal transfer size, and the block
layer default is too small, the solution is to adjust max_sectors_kb in
sysfs (by adding a udev rule, for instance).
--
Martin K. Petersen Oracle Linux Engineering
> Damien,
>
> >> How about set larger valid value between sdkp->max_xfer_blocks,
> >> and sdkp->opt_xfer_blocks to rw_max?
> >
> > Again, if your device reports an opt_xfer_blocks value that is too
> > small for its own good, that is a problem with this device.
>
> Correct. It is very much intentional that we do not default to issuing
> the largest commands supported by the physical hardware.
>
> If the device is not reporting an optimal transfer size, and the block
> layer default is too small, the solution is to adjust max_sectors_kb in
> sysfs (by adding a udev rule, for instance).
>
Sorry, I said wrong.
As others mentioned, it's device problem if opt_xfer_blocks is wrong.
kernel modification is not needed for it.
I want to discuss using max_xfer_blocks instead of opt_xfer_blocks as a optional.
For example, device reports opt_xfer_blocks is 512KB and 1MB as a
max_xfer_blocks too. Currently rw_max is set with 512KB only.
I want a option to select max_xfer_blocks for rw_max.
Are there any historical problem when rw_max is set with max_xfer_blocks?
How about below approach if max_xfer_blocks can be set to rw_max?
It's using queue flag as a option. Do you have good idea to suggest?
static bool sd_validate_max_xfer_size(struct scsi_disk *sdkp,
unsigned int dev_max)
{
struct request_queue *q = sdkp->disk->queue;
if (!blk_queue_allow_sd_rw_max(q))
return false;
if (sdkp->max_xfer_blocks == 0)
return false;
......
return true;
}
static int sd_revalidate_disk(struct gendisk *disk)
{
......
if (sd_validate_max_xfer_size(sdkp, dev_max)) {
q->limits.io_opt = logical_to_bytes(sdp, sdkp->max_xfer_blocks);
rw_max = logical_to_sectors(sdp, sdkp->max_xfer_blocks);
} else if (sd_validate_opt_xfer_size(sdkp, dev_max)) {
q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
} else {
......
}
> --
> Martin K. Petersen Oracle Linux Engineering
Hello Changheun!
> I want to discuss using max_xfer_blocks instead of opt_xfer_blocks as
> a optional. For example, device reports opt_xfer_blocks is 512KB and
> 1MB as a max_xfer_blocks too. Currently rw_max is set with 512KB only.
Because that's what the device asks for. If a device explicitly requests
us to use 512 KB I/Os we should not be sending it 1 MB requests.
The spec is very clear. It says that if you send a command *larger* than
opt_xfer_blocks, you should expect *slower* performance. That makes
max_xfer_blocks a particularly poor choice for setting the default I/O
size.
In addition to being slower, max_xfer_blocks could potentially also be
much, much larger than opt_xfer_blocks. I understand your 512 KB vs. 1
MB example. But if the max_xfer_blocks limit is reported as 1 GB, is
that then the right value to use instead of 512 KB? Probably not.
If a device does not report an opt_xfer_blocks value that suits your
workload, just override the resulting max_sectors_kb in sysfs. This is
intentionally a soft limit so it can be adjusted by the user without
having to change the kernel.
--
Martin K. Petersen Oracle Linux Engineering
> Hello Changheun!
>
> > I want to discuss using max_xfer_blocks instead of opt_xfer_blocks as
> > a optional. For example, device reports opt_xfer_blocks is 512KB and
> > 1MB as a max_xfer_blocks too. Currently rw_max is set with 512KB only.
>
> Because that's what the device asks for. If a device explicitly requests
> us to use 512 KB I/Os we should not be sending it 1 MB requests.
>
> The spec is very clear. It says that if you send a command *larger* than
> opt_xfer_blocks, you should expect *slower* performance. That makes
> max_xfer_blocks a particularly poor choice for setting the default I/O
> size.
>
> In addition to being slower, max_xfer_blocks could potentially also be
> much, much larger than opt_xfer_blocks. I understand your 512 KB vs. 1
> MB example. But if the max_xfer_blocks limit is reported as 1 GB, is
> that then the right value to use instead of 512 KB? Probably not.
>
> If a device does not report an opt_xfer_blocks value that suits your
> workload, just override the resulting max_sectors_kb in sysfs. This is
> intentionally a soft limit so it can be adjusted by the user without
> having to change the kernel.
>
> --
> Martin K. Petersen Oracle Linux Engineering
>
I understood what you said. I reviewed meaning of opt_xfer_blocks from
SCSI spec again. I think below is what you saw in spec.
The OPTIMAL TRANSFER LENGTH field indicates the optimal transfer size in
logical blocks for a single command shown in table 197. If a device server
receives one of these commands with a transfer size greater than this value,
then the device server may incur significant delays in processing the
command. If the OPTIMAL TRANSFER LENGTH field is set to zero, then there
is no reported optimal transfer size.
Thank you for kindly feedback. :)
---
Changheun Lee
Samsung Electronics