Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1036037AbdDUHZg convert rfc822-to-8bit (ORCPT ); Fri, 21 Apr 2017 03:25:36 -0400 Received: from mx2.suse.de ([195.135.220.15]:54183 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1035995AbdDUHZd (ORCPT ); Fri, 21 Apr 2017 03:25:33 -0400 Content-Type: text/plain; charset=us-ascii Mime-Version: 1.0 (1.0) Subject: Re: [PATCH] ceph: Fix file open flags on ppc64 From: Alexander Graf X-Mailer: iPhone Mail (14E304) In-Reply-To: <20170421065939.hunjgekn4knrfzx4@jf_suse_laptop> Date: Fri, 21 Apr 2017 09:25:26 +0200 Cc: "Yan, Zheng" , ceph-devel , Sage Weil , Ilya Dryomov , Linux Kernel Mailing List , Jan.Fajerski@suse.com Content-Transfer-Encoding: 8BIT Message-Id: <33BF9A5A-2B46-48B7-914C-10197BACA0E0@suse.de> References: <1492692030-146869-1-git-send-email-agraf@suse.de> <20170421065939.hunjgekn4knrfzx4@jf_suse_laptop> To: Jan Fajerski Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4790 Lines: 138 > Am 21.04.2017 um 08:59 schrieb Jan Fajerski : > >> On Fri, Apr 21, 2017 at 10:22:16AM +0800, Yan, Zheng wrote: >> >>> On 20 Apr 2017, at 20:40, Alexander Graf wrote: >>> >>> The file open flags (O_foo) are platform specific and should never go >>> out to an interface that is not local to the system. >>> >>> Unfortunately these flags have leaked out onto the wire in the cephfs >>> implementation. That lead to bogus flags getting transmitted on ppc64. >>> >>> This patch converts the kernel view of flags to the ceph view of file >>> open flags. On x86 systems, the new function should get optimized out >>> by smart compilers. On systems where the flags differ, it should adapt >>> them accordingly. >>> >>> Fixes: 124e68e74 ("ceph: file operations") >>> Signed-off-by: Alexander Graf >>> --- >>> fs/ceph/file.c | 45 +++++++++++++++++++++++++++++++++++++++++++- >>> include/linux/ceph/ceph_fs.h | 24 +++++++++++++++++++++++ >>> 2 files changed, 68 insertions(+), 1 deletion(-) >>> >>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c >>> index 26cc954..0ed6392 100644 >>> --- a/fs/ceph/file.c >>> +++ b/fs/ceph/file.c >>> @@ -103,6 +103,49 @@ static size_t dio_get_pagev_size(const struct iov_iter *it) >>> return ERR_PTR(ret); >>> } >>> >>> +static u32 ceph_flags_sys2wire(u32 flags) >>> +{ >>> + u32 wire_flags = 0; >>> + >>> + switch (flags & O_ACCMODE) { >>> + case O_RDONLY: >>> + wire_flags |= CEPH_O_RDONLY; >>> + break; >>> + case O_WRONLY: >>> + wire_flags |= CEPH_O_WRONLY; >>> + break; >>> + case O_RDWR: >>> + wire_flags |= CEPH_O_RDWR; >>> + break; >>> + } >>> + flags &= ~O_ACCMODE; >>> + >>> +#define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; } >>> + >>> + ceph_sys2wire(O_CREAT); >>> + ceph_sys2wire(O_NOCTTY); >>> + ceph_sys2wire(O_TRUNC); >>> + ceph_sys2wire(O_APPEND); >>> + ceph_sys2wire(O_NONBLOCK); >>> + ceph_sys2wire(O_DSYNC); >>> + ceph_sys2wire(FASYNC); >>> + ceph_sys2wire(O_DIRECT); >>> + ceph_sys2wire(O_LARGEFILE); >>> + ceph_sys2wire(O_DIRECTORY); >>> + ceph_sys2wire(O_NOFOLLOW); >>> + ceph_sys2wire(O_NOATIME); >>> + ceph_sys2wire(O_CLOEXEC); >>> + ceph_sys2wire(__O_SYNC); >>> + ceph_sys2wire(O_PATH); >>> + ceph_sys2wire(__O_TMPFILE); >>> + >>> +#undef ceph_sys2wire >>> + >>> + WARN_ONCE(flags, "Found unknown open flags: %x", flags); >>> + >>> + return wire_flags; >>> +} >>> + >>> /* >>> * Prepare an open request. Preallocate ceph_cap to avoid an >>> * inopportune ENOMEM later. >>> @@ -123,7 +166,7 @@ static size_t dio_get_pagev_size(const struct iov_iter *it) >>> if (IS_ERR(req)) >>> goto out; >>> req->r_fmode = ceph_flags_to_mode(flags); >>> - req->r_args.open.flags = cpu_to_le32(flags); >>> + req->r_args.open.flags = ceph_flags_sys2wire(cpu_to_le32(flags)); >> >> cpu_to_le32(ceph_flags_sys2wire(flags)) ? >> >>> req->r_args.open.mode = cpu_to_le32(create_mode); >>> out: >>> return req; >>> diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h >>> index f4b2ee1..d254b48 100644 >>> --- a/include/linux/ceph/ceph_fs.h >>> +++ b/include/linux/ceph/ceph_fs.h >>> @@ -366,6 +366,30 @@ enum { >>> #define CEPH_READDIR_FRAG_COMPLETE (1<<8) >>> #define CEPH_READDIR_HASH_ORDER (1<<9) >>> >>> +/* >>> + * open request flags >>> + */ >>> +#define CEPH_O_RDONLY 00000000 >>> +#define CEPH_O_WRONLY 00000001 >>> +#define CEPH_O_RDWR 00000002 >>> +#define CEPH_O_CREAT 00000100 >>> +#define CEPH_O_EXCL 00000200 >>> +#define CEPH_O_NOCTTY 00000400 >>> +#define CEPH_O_TRUNC 00001000 >>> +#define CEPH_O_APPEND 00002000 >>> +#define CEPH_O_NONBLOCK 00004000 >>> +#define CEPH_O_DSYNC 00010000 >>> +#define CEPH_FASYNC 00020000 >>> +#define CEPH_O_DIRECT 00040000 >>> +#define CEPH_O_LARGEFILE 00100000 >>> +#define CEPH_O_DIRECTORY 00200000 >>> +#define CEPH_O_NOFOLLOW 00400000 >>> +#define CEPH_O_NOATIME 01000000 >>> +#define CEPH_O_CLOEXEC 02000000 >>> +#define CEPH___O_SYNC 04000000 >>> +#define CEPH_O_PATH 010000000 >>> +#define CEPH___O_TMPFILE 020000000 >>> + >> >> RDONLY, WRONLY, RDWR, CREAT, EXCL, TRUNC, DIRECTORY, NOFOLLOW are used by mds, no need to define the rest. >> >>> union ceph_mds_request_args { >>> struct { >>> __le32 mask; /* CEPH_CAP_* */ >> >> Besides, the ceph mds server requires the fix too. Do you have patch for that? > If the wire format is normalized like this, the mds can remain unchanged, can it not? I think he means the case when mds is running on a ppc64 host. Alex