When initrd (compressed or not) is used, kernel report data corrupted
with /dev/ram0.
The root cause:
During initramfs checking, if it is initrd, it will be transferred to
/initrd.image with sys_write.
sys_write only support 2G-4K write, so if the initrd ram is more than
that, /initrd.image will not complete at all.
Add local xwrite to loop calling sys_write to workaround the
problem.
Also need to use xwrite in write_buffer() to handle:
image is uncompressed cpio and there is one big file (>2G) in it.
unpack_to_rootfs ===> write_buffer ===> actions[]/do_copy
At the same time, we don't need to worry about sys_read/sys_write in
do_mounts_rd.c::crd_load. As decompressor will have fill/flush and
local buffer that is smaller than 2G.
Test with uncompressed initrd, and compressed ones with gz, bz2, lzma,xz,
lzop.
-v2: according to HPA, change name to xwrite.
Signed-off-by: Yinghai Lu <[email protected]>
Acked-by: H. Peter Anvin <[email protected]>
---
init/initramfs.c | 33 +++++++++++++++++++++++++++++----
1 file changed, 29 insertions(+), 4 deletions(-)
Index: linux-2.6/init/initramfs.c
===================================================================
--- linux-2.6.orig/init/initramfs.c
+++ linux-2.6/init/initramfs.c
@@ -19,6 +19,26 @@
#include <linux/syscalls.h>
#include <linux/utime.h>
+static long __init xwrite(unsigned int fd, char *p,
+ size_t count)
+{
+ ssize_t left = count;
+ long written;
+
+ /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
+ while (left > 0) {
+ written = sys_write(fd, p, left);
+
+ if (written <= 0)
+ break;
+
+ left -= written;
+ p += written;
+ }
+
+ return (written < 0) ? written : count;
+}
+
static __initdata char *message;
static void __init error(char *x)
{
@@ -346,7 +366,7 @@ static int __init do_name(void)
static int __init do_copy(void)
{
if (count >= body_len) {
- sys_write(wfd, victim, body_len);
+ xwrite(wfd, victim, body_len);
sys_close(wfd);
do_utime(vcollected, mtime);
kfree(vcollected);
@@ -354,7 +374,7 @@ static int __init do_copy(void)
state = SkipIt;
return 0;
} else {
- sys_write(wfd, victim, count);
+ xwrite(wfd, victim, count);
body_len -= count;
eat(count);
return 1;
@@ -604,8 +624,13 @@ static int __init populate_rootfs(void)
fd = sys_open("/initrd.image",
O_WRONLY|O_CREAT, 0700);
if (fd >= 0) {
- sys_write(fd, (char *)initrd_start,
- initrd_end - initrd_start);
+ long written = xwrite(fd, (char *)initrd_start,
+ initrd_end - initrd_start);
+
+ if (written != initrd_end - initrd_start)
+ pr_err("/initrd.image: incomplete write (%ld != %ld)\n",
+ written, initrd_end - initrd_start);
+
sys_close(fd);
free_initrd();
}
On 06/20/2014 07:29 PM, Yinghai Lu wrote:
> When initrd (compressed or not) is used, kernel report data corrupted
> with /dev/ram0.
>
> The root cause:
> During initramfs checking, if it is initrd, it will be transferred to
> /initrd.image with sys_write.
> sys_write only support 2G-4K write, so if the initrd ram is more than
> that, /initrd.image will not complete at all.
>
> Add local xwrite to loop calling sys_write to workaround the
> problem.
>
> Also need to use xwrite in write_buffer() to handle:
> image is uncompressed cpio and there is one big file (>2G) in it.
> unpack_to_rootfs ===> write_buffer ===> actions[]/do_copy
>
> At the same time, we don't need to worry about sys_read/sys_write in
> do_mounts_rd.c::crd_load. As decompressor will have fill/flush and
> local buffer that is smaller than 2G.
>
> Test with uncompressed initrd, and compressed ones with gz, bz2, lzma,xz,
> lzop.
>
> -v2: according to HPA, change name to xwrite.
>
> Signed-off-by: Yinghai Lu <[email protected]>
> Acked-by: H. Peter Anvin <[email protected]>
>
> ---
> init/initramfs.c | 33 +++++++++++++++++++++++++++++----
> 1 file changed, 29 insertions(+), 4 deletions(-)
>
> Index: linux-2.6/init/initramfs.c
> ===================================================================
> --- linux-2.6.orig/init/initramfs.c
> +++ linux-2.6/init/initramfs.c
> @@ -19,6 +19,26 @@
> #include <linux/syscalls.h>
> #include <linux/utime.h>
>
> +static long __init xwrite(unsigned int fd, char *p,
> + size_t count)
> +{
> + ssize_t left = count;
> + long written;
> +
> + /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
> + while (left > 0) {
> + written = sys_write(fd, p, left);
> +
> + if (written <= 0)
> + break;
> +
> + left -= written;
> + p += written;
> + }
> +
> + return (written < 0) ? written : count;
The return value is bogus here, although it is probably theoretical (and
since you only care about a full write), but a written of 0 would return
in count being returned no matter the actual written value.
The normal behavior of xwrite(), like fwrite() and write(), is to return
the total number of bytes written if any bytes are written at all.
Here is my personal implementation of xwrite() (designed for userspace):
http://git.zytor.com/?p=lib/lib.git;a=blob;f=xwrite.c;hb=HEAD
Otherwise, the patch looks good.
-hpa
On Sat, Jun 21, 2014 at 4:29 AM, Yinghai Lu <[email protected]> wrote:
> --- linux-2.6.orig/init/initramfs.c
> +++ linux-2.6/init/initramfs.c
> @@ -19,6 +19,26 @@
> #include <linux/syscalls.h>
> #include <linux/utime.h>
>
> +static long __init xwrite(unsigned int fd, char *p,
> + size_t count)
Shouldn't this return ssize_t instead of long?
> +{
> + ssize_t left = count;
> + long written;
ssize_t written
> +
> + /* sys_write only can write MAX_RW_COUNT aka 2G-4K bytes at most */
> + while (left > 0) {
> + written = sys_write(fd, p, left);
> +
> + if (written <= 0)
> + break;
> +
> + left -= written;
> + p += written;
> + }
> +
> + return (written < 0) ? written : count;
> +}
> +
> static __initdata char *message;
> static void __init error(char *x)
> {
> @@ -604,8 +624,13 @@ static int __init populate_rootfs(void)
> fd = sys_open("/initrd.image",
> O_WRONLY|O_CREAT, 0700);
> if (fd >= 0) {
> - sys_write(fd, (char *)initrd_start,
> - initrd_end - initrd_start);
> + long written = xwrite(fd, (char *)initrd_start,
ssize_t written
> + initrd_end - initrd_start);
> +
> + if (written != initrd_end - initrd_start)
> + pr_err("/initrd.image: incomplete write (%ld != %ld)\n",
"%zd", once written is ssize_t.
> + written, initrd_end - initrd_start);
> +
> sys_close(fd);
> free_initrd();
> }
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds