With linux-next-20160317, KASAN splats for both, mpi_write_sgl()
and mpi_read_buffer().
While the first isn't dramatic on x86 (as long as it doesn't fault),
the latter potentially overwrites unrelated memory.
These two issues are fixed by
[4/8] ("lib/mpi: mpi_write_sgl(): fix out-of-bounds stack access")
and
[8/8] ("lib/mpi: mpi_read_buffer(): fix buffer overflow")
While reviewing the code, I found another bug, c.f.
[1/8] ("lib/mpi: mpi_write_sgl(): fix skipping of leading zero limbs")
The rest are cleanup/style patches I couldn't hold back ;)
There is still room for improvement in that the leading zero handling
could get easily moved out of the loop, but that's not the business of
this series.
Specifically. this patchset fixes
2d4d1eea540b ("lib/mpi: Add mpi sgl helpers")
and
9cbe21d8f89d ("lib/mpi: only require buffers as big as needed for
the integer")
This series is applicable to linux-next-20160317.
Nicolai Stange (8):
lib/mpi: mpi_write_sgl(): fix skipping of leading zero limbs
lib/mpi: mpi_write_sgl(): fix style issue with lzero decrement
lib/mpi: mpi_write_sgl(): purge redundant pointer arithmetic
lib/mpi: mpi_write_sgl(): fix out-of-bounds stack access
lib/mpi: mpi_write_sgl(): replace open coded endian conversion
lib/mpi: mpi_read_buffer(): optimize skipping of leading zero limbs
lib/mpi: mpi_read_buffer(): replace open coded endian conversion
lib/mpi: mpi_read_buffer(): fix buffer overflow
lib/mpi/mpicoder.c | 91 ++++++++++++++++++++----------------------------------
1 file changed, 33 insertions(+), 58 deletions(-)
--
2.7.2
Currently, if the number of leading zeros is greater than fits into a
complete limb, mpi_write_sgl() skips them by iterating over them limb-wise.
However, it fails to adjust its internal leading zeros tracking variable,
lzeros, accordingly: it does a
p -= sizeof(alimb);
continue;
which should really have been a
lzeros -= sizeof(alimb);
continue;
Since lzeros never decreases if its initial value >= sizeof(alimb), nothing
gets copied by mpi_write_sgl() in that case.
Instead of skipping the high order zero limbs within the loop as shown
above, fix the issue by adjusting the copying loop's bounds.
Fixes: 2d4d1eea540b ("lib/mpi: Add mpi sgl helpers")
Signed-off-by: Nicolai Stange <[email protected]>
---
lib/mpi/mpicoder.c | 21 +++++++++------------
1 file changed, 9 insertions(+), 12 deletions(-)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index eb15e7d..6bb52be 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -380,7 +380,9 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
buf_len = sgl->length;
p2 = sg_virt(sgl);
- for (i = a->nlimbs - 1; i >= 0; i--) {
+ for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB,
+ lzeros %= BYTES_PER_MPI_LIMB;
+ i >= 0; i--) {
alimb = a->d[i];
p = (u8 *)&alimb2;
#if BYTES_PER_MPI_LIMB == 4
@@ -401,17 +403,12 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
#error please implement for this limb size.
#endif
if (lzeros > 0) {
- if (lzeros >= sizeof(alimb)) {
- p -= sizeof(alimb);
- continue;
- } else {
- mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
- mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
- + lzeros;
- *limb1 = *limb2;
- p -= lzeros;
- y = lzeros;
- }
+ mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
+ mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
+ + lzeros;
+ *limb1 = *limb2;
+ p -= lzeros;
+ y = lzeros;
lzeros -= sizeof(alimb);
}
--
2.7.2
Within the copying loop in mpi_write_sgl(), we have
if (lzeros) {
...
p -= lzeros;
y = lzeros;
}
p = p - (sizeof(alimb) - y);
If lzeros == 0, then y == 0, too. Thus, lzeros gets subtracted and added
back again to p.
Purge this redundancy.
Signed-off-by: Nicolai Stange <[email protected]>
---
lib/mpi/mpicoder.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index d8b372b..78ec4e1 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -407,12 +407,11 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
+ lzeros;
*limb1 = *limb2;
- p -= lzeros;
y = lzeros;
lzeros = 0;
}
- p = p - (sizeof(alimb) - y);
+ p = p - sizeof(alimb);
for (x = 0; x < sizeof(alimb) - y; x++) {
if (!buf_len) {
--
2.7.2
Currently, mpi_read_buffer() writes full limbs to the output buffer
and moves memory around to purge leading zero limbs afterwards.
However, with
commit 9cbe21d8f89d ("lib/mpi: only require buffers as big as needed for
the integer")
the caller is only required to provide a buffer large enough to hold the
result without the leading zeros.
This might result in a buffer overflow for small MP numbers with leading
zeros.
Fix this by coping the result to its final destination within the output
buffer and not copying the leading zeros at all.
Fixes: 9cbe21d8f89d ("lib/mpi: only require buffers as big as needed for
the integer")
Signed-off-by: Nicolai Stange <[email protected]>
---
lib/mpi/mpicoder.c | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index a999ee1..27582e2 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -201,16 +201,9 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
#else
#error please implement for this limb size.
#endif
- memcpy(p, &alimb, BYTES_PER_MPI_LIMB);
- p += BYTES_PER_MPI_LIMB;
- if (lzeros > 0) {
- mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
- mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
- + lzeros;
- *limb1 = *limb2;
- p -= lzeros;
- lzeros -= sizeof(alimb);
- }
+ memcpy(p, &alimb + lzeros, BYTES_PER_MPI_LIMB - lzeros);
+ p += BYTES_PER_MPI_LIMB - lzeros;
+ lzeros = 0;
}
return 0;
}
--
2.7.2
Currently, the endian conversion from CPU order to BE is open coded in
mpi_read_buffer().
Replace this by the centrally provided cpu_to_be*() macros.
Copy from the temporary storage on stack to the destination buffer
by means of memcpy().
Signed-off-by: Nicolai Stange <[email protected]>
---
lib/mpi/mpicoder.c | 27 ++++++++++++---------------
1 file changed, 12 insertions(+), 15 deletions(-)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 2fd8d41..a999ee1 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -21,6 +21,7 @@
#include <linux/bitops.h>
#include <linux/count_zeros.h>
#include <linux/byteorder/generic.h>
+#include <linux/string.h>
#include "mpi-internal.h"
#define MAX_EXTERN_MPI_BITS 16384
@@ -164,7 +165,13 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
int *sign)
{
uint8_t *p;
- mpi_limb_t alimb;
+#if BYTES_PER_MPI_LIMB == 4
+ __be32 alimb;
+#elif BYTES_PER_MPI_LIMB == 8
+ __be64 alimb;
+#else
+#error please implement for this limb size.
+#endif
unsigned int n = mpi_get_size(a);
int i, lzeros;
@@ -187,25 +194,15 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB,
lzeros %= BYTES_PER_MPI_LIMB;
i >= 0; i--) {
- alimb = a->d[i];
#if BYTES_PER_MPI_LIMB == 4
- *p++ = alimb >> 24;
- *p++ = alimb >> 16;
- *p++ = alimb >> 8;
- *p++ = alimb;
+ alimb = cpu_to_be32(a->d[i]);
#elif BYTES_PER_MPI_LIMB == 8
- *p++ = alimb >> 56;
- *p++ = alimb >> 48;
- *p++ = alimb >> 40;
- *p++ = alimb >> 32;
- *p++ = alimb >> 24;
- *p++ = alimb >> 16;
- *p++ = alimb >> 8;
- *p++ = alimb;
+ alimb = cpu_to_be64(a->d[i]);
#else
#error please implement for this limb size.
#endif
-
+ memcpy(p, &alimb, BYTES_PER_MPI_LIMB);
+ p += BYTES_PER_MPI_LIMB;
if (lzeros > 0) {
mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
--
2.7.2
Currently, if the number of leading zeros is greater than fits into a
complete limb, mpi_read_buffer() skips them by iterating over them
limb-wise.
Instead of skipping the high order zero limbs within the loop as shown
above, adjust the copying loop's bounds.
Signed-off-by: Nicolai Stange <[email protected]>
---
lib/mpi/mpicoder.c | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 623439e..2fd8d41 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -184,7 +184,9 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
p = buf;
*nbytes = n - lzeros;
- for (i = a->nlimbs - 1; i >= 0; i--) {
+ for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB,
+ lzeros %= BYTES_PER_MPI_LIMB;
+ i >= 0; i--) {
alimb = a->d[i];
#if BYTES_PER_MPI_LIMB == 4
*p++ = alimb >> 24;
@@ -205,15 +207,11 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
#endif
if (lzeros > 0) {
- if (lzeros >= sizeof(alimb)) {
- p -= sizeof(alimb);
- } else {
- mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
- mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
- + lzeros;
- *limb1 = *limb2;
- p -= lzeros;
- }
+ mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
+ mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
+ + lzeros;
+ *limb1 = *limb2;
+ p -= lzeros;
lzeros -= sizeof(alimb);
}
}
--
2.7.2
Within the copying loop in mpi_write_sgl(), we have
if (lzeros) {
mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
+ lzeros;
*limb1 = *limb2;
...
}
where p points past the end of alimb2 which lives on the stack and contains
the current limb in BE order.
The purpose of the above is to shift the non-zero bytes of alimb2 to its
beginning in memory, i.e. to skip its leading zero bytes.
However, limb2 points somewhere into the middle of alimb2 and thus, reading
*limb2 pulls in lzero bytes from somewhere.
Indeed, KASAN splats:
BUG: KASAN: stack-out-of-bounds in mpi_write_to_sgl+0x4e3/0x6f0
at addr ffff8800cb04f601
Read of size 8 by task systemd-udevd/391
page:ffffea00032c13c0 count:0 mapcount:0 mapping: (null) index:0x0
flags: 0x3fff8000000000()
page dumped because: kasan: bad access detected
CPU: 3 PID: 391 Comm: systemd-udevd Tainted: G B L
4.5.0-next-20160316+ #12
[...]
Call Trace:
[<ffffffff8194889e>] dump_stack+0xdc/0x15e
[<ffffffff819487c2>] ? _atomic_dec_and_lock+0xa2/0xa2
[<ffffffff814892b5>] ? __dump_page+0x185/0x330
[<ffffffff8150ffd6>] kasan_report_error+0x5e6/0x8b0
[<ffffffff814724cd>] ? kzfree+0x2d/0x40
[<ffffffff819c5bce>] ? mpi_free_limb_space+0xe/0x20
[<ffffffff819c469e>] ? mpi_powm+0x37e/0x16f0
[<ffffffff815109f1>] kasan_report+0x71/0xa0
[<ffffffff819c0353>] ? mpi_write_to_sgl+0x4e3/0x6f0
[<ffffffff8150ed34>] __asan_load8+0x64/0x70
[<ffffffff819c0353>] mpi_write_to_sgl+0x4e3/0x6f0
[<ffffffff819bfe70>] ? mpi_set_buffer+0x620/0x620
[<ffffffff819c0e6f>] ? mpi_cmp+0xbf/0x180
[<ffffffff8186e282>] rsa_verify+0x202/0x260
What's more, since lzeros can be anything from 1 to sizeof(mpi_limb_t)-1,
the above will cause unaligned accesses which is bad on non-x86 archs.
Fix the issue, by preparing the starting point p for the upcoming copy
operation instead of shifting the source memory, i.e. alimb2.
Fixes: 2d4d1eea540b ("lib/mpi: Add mpi sgl helpers")
Signed-off-by: Nicolai Stange <[email protected]>
---
lib/mpi/mpicoder.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 78ec4e1..b05d390 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -403,15 +403,11 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
#error please implement for this limb size.
#endif
if (lzeros) {
- mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
- mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
- + lzeros;
- *limb1 = *limb2;
y = lzeros;
lzeros = 0;
}
- p = p - sizeof(alimb);
+ p = p - sizeof(alimb) + y;
for (x = 0; x < sizeof(alimb) - y; x++) {
if (!buf_len) {
--
2.7.2
Currently, the endian conversion from CPU order to BE is open coded in
mpi_write_sgl().
Replace this by the centrally provided cpu_to_be*() macros.
Signed-off-by: Nicolai Stange <[email protected]>
---
lib/mpi/mpicoder.c | 27 +++++++++++----------------
1 file changed, 11 insertions(+), 16 deletions(-)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index b05d390..623439e 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -20,6 +20,7 @@
#include <linux/bitops.h>
#include <linux/count_zeros.h>
+#include <linux/byteorder/generic.h>
#include "mpi-internal.h"
#define MAX_EXTERN_MPI_BITS 16384
@@ -359,7 +360,13 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
int *sign)
{
u8 *p, *p2;
- mpi_limb_t alimb, alimb2;
+#if BYTES_PER_MPI_LIMB == 4
+ __be32 alimb;
+#elif BYTES_PER_MPI_LIMB == 8
+ __be64 alimb;
+#else
+#error please implement for this limb size.
+#endif
unsigned int n = mpi_get_size(a);
int i, x, y = 0, lzeros, buf_len;
@@ -383,22 +390,10 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB,
lzeros %= BYTES_PER_MPI_LIMB;
i >= 0; i--) {
- alimb = a->d[i];
- p = (u8 *)&alimb2;
#if BYTES_PER_MPI_LIMB == 4
- *p++ = alimb >> 24;
- *p++ = alimb >> 16;
- *p++ = alimb >> 8;
- *p++ = alimb;
+ alimb = cpu_to_be32(a->d[i]);
#elif BYTES_PER_MPI_LIMB == 8
- *p++ = alimb >> 56;
- *p++ = alimb >> 48;
- *p++ = alimb >> 40;
- *p++ = alimb >> 32;
- *p++ = alimb >> 24;
- *p++ = alimb >> 16;
- *p++ = alimb >> 8;
- *p++ = alimb;
+ alimb = cpu_to_be64(a->d[i]);
#else
#error please implement for this limb size.
#endif
@@ -407,7 +402,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
lzeros = 0;
}
- p = p - sizeof(alimb) + y;
+ p = (u8 *)&alimb + y;
for (x = 0; x < sizeof(alimb) - y; x++) {
if (!buf_len) {
--
2.7.2
Within the copying loop in mpi_write_sgl(), we have
if (lzeros > 0) {
...
lzeros -= sizeof(alimb);
}
However, at this point, lzeros < sizeof(alimb) holds. Make this fact
explicit by rewriting the above to
if (lzeros) {
...
lzeros = 0;
}
Signed-off-by: Nicolai Stange <[email protected]>
---
lib/mpi/mpicoder.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 6bb52be..d8b372b 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -402,14 +402,14 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
#else
#error please implement for this limb size.
#endif
- if (lzeros > 0) {
+ if (lzeros) {
mpi_limb_t *limb1 = (void *)p - sizeof(alimb);
mpi_limb_t *limb2 = (void *)p - sizeof(alimb)
+ lzeros;
*limb1 = *limb2;
p -= lzeros;
y = lzeros;
- lzeros -= sizeof(alimb);
+ lzeros = 0;
}
p = p - (sizeof(alimb) - y);
--
2.7.2
Nicolai Stange <[email protected]> writes:
I've just sent a supplemented v2:
http://lkml.kernel.org/g/[email protected]
So please drop this v1.
Thanks,
Nicolai