2024-01-23 19:49:32

by Jens Axboe

[permalink] [raw]
Subject: [PATCH] iov_iter: streamline iovec/bvec alignment iteration

Rewrite iov_iter_aligned_iovec() and iov_iter_aligned_bvec() to be both
easier to read, and also significantly more compact in terms of
generated code. This saves 178 bytes of text on x86-64 for me (with
clang-18) and 136 bytes on arm64 (with gcc-13).

In profiles, also saves a bit of time as well for the same workload:

0.81% -0.18% [kernel.vmlinux] [k] iov_iter_aligned_bvec
0.48% -0.09% [kernel.vmlinux] [k] iov_iter_is_aligned

which is a nice side benefit as well.

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e0aa6b440ca5..2fcc47d822e3 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -714,12 +714,11 @@ EXPORT_SYMBOL(iov_iter_discard);
static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
unsigned len_mask)
{
+ const struct iovec *iov = iter_iov(i);
size_t size = i->count;
size_t skip = i->iov_offset;
- unsigned k;

- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- const struct iovec *iov = iter_iov(i) + k;
+ do {
size_t len = iov->iov_len - skip;

if (len > size)
@@ -729,34 +728,36 @@ static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
if ((unsigned long)(iov->iov_base + skip) & addr_mask)
return false;

+ iov++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size);
+
return true;
}

static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
unsigned len_mask)
{
- size_t size = i->count;
+ const struct bio_vec *bvec = i->bvec;
unsigned skip = i->iov_offset;
- unsigned k;
+ size_t size = i->count;

- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- size_t len = i->bvec[k].bv_len - skip;
+ do {
+ size_t len = bvec->bv_len;

if (len > size)
len = size;
if (len & len_mask)
return false;
- if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask)
+ if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
return false;

+ bvec++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size);
+
return true;
}

--
Jens Axboe



2024-01-24 12:12:58

by David Laight

[permalink] [raw]
Subject: RE: [PATCH] iov_iter: streamline iovec/bvec alignment iteration

From: Jens Axboe
> Sent: 23 January 2024 19:49
>
> Rewrite iov_iter_aligned_iovec() and iov_iter_aligned_bvec() to be both
> easier to read, and also significantly more compact in terms of
> generated code. This saves 178 bytes of text on x86-64 for me (with
> clang-18) and 136 bytes on arm64 (with gcc-13).
>
...
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index e0aa6b440ca5..2fcc47d822e3 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -714,12 +714,11 @@ EXPORT_SYMBOL(iov_iter_discard);
...
> - for (k = 0; k < i->nr_segs; k++, skip = 0) {
> - const struct iovec *iov = iter_iov(i) + k;
> + do {
> size_t len = iov->iov_len - skip;

Is i->nr_segs allowed to be zero?
The old code (seemed to) check for zero.
The new version will go horribly wrong.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2024-01-24 14:55:52

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH] iov_iter: streamline iovec/bvec alignment iteration

On 1/24/24 5:07 AM, David Laight wrote:
> From: Jens Axboe
>> Sent: 23 January 2024 19:49
>>
>> Rewrite iov_iter_aligned_iovec() and iov_iter_aligned_bvec() to be both
>> easier to read, and also significantly more compact in terms of
>> generated code. This saves 178 bytes of text on x86-64 for me (with
>> clang-18) and 136 bytes on arm64 (with gcc-13).
>>
> ...
>> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
>> index e0aa6b440ca5..2fcc47d822e3 100644
>> --- a/lib/iov_iter.c
>> +++ b/lib/iov_iter.c
>> @@ -714,12 +714,11 @@ EXPORT_SYMBOL(iov_iter_discard);
> ...
>> - for (k = 0; k < i->nr_segs; k++, skip = 0) {
>> - const struct iovec *iov = iter_iov(i) + k;
>> + do {
>> size_t len = iov->iov_len - skip;
>
> Is i->nr_segs allowed to be zero?

Not if size is not zero.

> The old code (seemed to) check for zero.
> The new version will go horribly wrong.

In other spots we do check both segments and size, but not that
nr_segments is non-zero to begin with. So I think this revised version
may be better. Saves 176 bytes of text on arm64 for me, and 126 bytes on
x86-64. I'll send out a v3.


diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e0aa6b440ca5..5e34639c5d1e 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -714,12 +714,12 @@ EXPORT_SYMBOL(iov_iter_discard);
static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
unsigned len_mask)
{
+ const struct iovec *iov = iter_iov(i);
size_t size = i->count;
size_t skip = i->iov_offset;
- unsigned k;
+ unsigned long nr_segs = i->nr_segs;

- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- const struct iovec *iov = iter_iov(i) + k;
+ do {
size_t len = iov->iov_len - skip;

if (len > size)
@@ -729,34 +729,37 @@ static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
if ((unsigned long)(iov->iov_base + skip) & addr_mask)
return false;

+ iov++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size && --nr_segs);
+
return true;
}

static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
unsigned len_mask)
{
- size_t size = i->count;
+ const struct bio_vec *bvec = i->bvec;
unsigned skip = i->iov_offset;
- unsigned k;
+ size_t size = i->count;
+ unsigned long nr_segs = i->nr_segs;

- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- size_t len = i->bvec[k].bv_len - skip;
+ do {
+ size_t len = bvec->bv_len;

if (len > size)
len = size;
if (len & len_mask)
return false;
- if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask)
+ if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
return false;

+ bvec++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size && --nr_segs);
+
return true;
}

@@ -800,13 +803,13 @@ EXPORT_SYMBOL_GPL(iov_iter_is_aligned);

static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
{
+ const struct iovec *iov = iter_iov(i);
unsigned long res = 0;
size_t size = i->count;
size_t skip = i->iov_offset;
- unsigned k;
+ unsigned long nr_segs = i->nr_segs;

- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- const struct iovec *iov = iter_iov(i) + k;
+ do {
size_t len = iov->iov_len - skip;
if (len) {
res |= (unsigned long)iov->iov_base + skip;
@@ -814,30 +817,33 @@ static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
len = size;
res |= len;
size -= len;
- if (!size)
- break;
}
- }
+ iov++;
+ skip = 0;
+ } while (size && --nr_segs);
+
return res;
}

static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
{
+ const struct bio_vec *bvec = i->bvec;
unsigned res = 0;
size_t size = i->count;
unsigned skip = i->iov_offset;
- unsigned k;
+ unsigned long nr_segs = i->nr_segs;

- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- size_t len = i->bvec[k].bv_len - skip;
- res |= (unsigned long)i->bvec[k].bv_offset + skip;
+ do {
+ size_t len = bvec->bv_len - skip;
+ res |= (unsigned long)bvec->bv_offset + skip;
if (len > size)
len = size;
res |= len;
+ bvec++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size && --nr_segs);
+
return res;
}


--
Jens Axboe