Hi,
After having tried to thrash out what exactly is the kernel
interface for telldir/seekdir w.r.t. the existence of negative offsets
with the glibc people, I've finally found a way to work within the
current scheme.
The problem at hand is that we currently would like to support the
existence of directory cookies that take unsigned 32-bit values in
NFSv2, unsigned 64-bit values in NFSv3.
Given that most NFSv3 servers can/do use 32-bit cookies for
compatibility with 32-bit systems, we would like to be able to pass
this type of cookie back up to userland for use by the 32-bit
interface.
However the interface chosen both in glibc and partly in the kernel
itself assumes all cookies are 32-bit signed values. Thus you have to
find a way to cope with the kernel and glibc sign extending (almost)
all cookies which have bit 31 set.
The following patch therefore does 3 things:
- Patch linux/fs/readdir.c so that file->f_pos is copied into the
dirent64 structure with sign extension. This is for consistency
with the behaviour of filldir64.
- Patch NFSv2 xdr routines so that 32-bit cookies get extended to
take 64-bit signed values (as opposed to unsigned values) for
consistency with the fact that (l|)off_t are both signed.
- Patch NFSv3 xdr routines with a hack that mimics sign extension
on those cookies which are truly 32-bit unsigned.
To do this we use the transformation
(cookie & 0x80000000) ? cookie ^ 0xFFFFFFFF00000000 : cookie;
Note that this a transformation has no effect on true 64-bit
cookies because it is reversible.
- Make a special version of 'lseek()' for NFS directories that
returns 0 if the offset used was negative (rather than returning
the offset itself). This avoids userland misinterpreting the
return value as an error.
The above fixes should ensure that all cookies taking values between 0
and (2^32-1) on the NFS server are preserved through the 32-bit VFS
interface, and are accepted by glibc as valid entries. It should also
work fine with existing 64-bit architectures.
Please could people give this a try, and report if it fixes the
problems.
Cheers,
Trond
diff -u --recursive --new-file linux-2.4.2-fh_align/fs/nfs/dir.c linux-2.4.2-dir/fs/nfs/dir.c
--- linux-2.4.2-fh_align/fs/nfs/dir.c Fri Feb 9 20:29:44 2001
+++ linux-2.4.2-dir/fs/nfs/dir.c Thu Feb 22 12:34:41 2001
@@ -34,6 +34,7 @@
#define NFS_PARANOIA 1
/* #define NFS_DEBUG_VERBOSE 1 */
+static loff_t nfs_dir_llseek(struct file *, loff_t, int);
static int nfs_readdir(struct file *, void *, filldir_t);
static struct dentry *nfs_lookup(struct inode *, struct dentry *);
static int nfs_create(struct inode *, struct dentry *, int);
@@ -47,6 +48,7 @@
struct inode *, struct dentry *);
struct file_operations nfs_dir_operations = {
+ llseek: nfs_dir_llseek,
read: generic_read_dir,
readdir: nfs_readdir,
open: nfs_open,
@@ -67,6 +69,25 @@
revalidate: nfs_revalidate,
setattr: nfs_notify_change,
};
+
+static loff_t nfs_dir_llseek(struct file *file, loff_t offset, int origin)
+{
+ switch (origin) {
+ case 1:
+ if (offset == 0) {
+ offset = file->f_pos;
+ break;
+ }
+ case 2:
+ return -EINVAL;
+ }
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_reada = 0;
+ file->f_version = ++event;
+ }
+ return (offset <= 0) ? 0 : offset;
+}
typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int);
typedef struct {
diff -u --recursive --new-file linux-2.4.2-fh_align/fs/nfs/nfs2xdr.c linux-2.4.2-dir/fs/nfs/nfs2xdr.c
--- linux-2.4.2-fh_align/fs/nfs/nfs2xdr.c Fri Feb 9 20:29:44 2001
+++ linux-2.4.2-dir/fs/nfs/nfs2xdr.c Thu Feb 22 10:47:49 2001
@@ -419,7 +419,7 @@
bufsiz = bufsiz >> 2;
p = xdr_encode_fhandle(p, args->fh);
- *p++ = htonl(args->cookie);
+ *p++ = htonl(args->cookie & 0xFFFFFFFF);
*p++ = htonl(bufsiz); /* see above */
req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
@@ -504,7 +504,7 @@
entry->name = (const char *) p;
p += XDR_QUADLEN(entry->len);
entry->prev_cookie = entry->cookie;
- entry->cookie = ntohl(*p++);
+ entry->cookie = (s64)((off_t)ntohl(*p++));
entry->eof = !p[0] && p[1];
return p;
diff -u --recursive --new-file linux-2.4.2-fh_align/fs/nfs/nfs3xdr.c linux-2.4.2-dir/fs/nfs/nfs3xdr.c
--- linux-2.4.2-fh_align/fs/nfs/nfs3xdr.c Fri Feb 9 20:29:44 2001
+++ linux-2.4.2-dir/fs/nfs/nfs3xdr.c Thu Feb 22 10:47:49 2001
@@ -523,6 +523,13 @@
return 0;
}
+/* Hack to sign-extending 32-bit cookies */
+static inline
+u64 nfs_transform_cookie64(u64 cookie)
+{
+ return (cookie & 0x80000000) ? (cookie ^ 0xFFFFFFFF00000000) : cookie;
+}
+
/*
* Encode arguments to readdir call
*/
@@ -533,7 +540,7 @@
int buflen, replen;
p = xdr_encode_fhandle(p, args->fh);
- p = xdr_encode_hyper(p, args->cookie);
+ p = xdr_encode_hyper(p, nfs_transform_cookie64(args->cookie));
*p++ = args->verf[0];
*p++ = args->verf[1];
if (args->plus) {
@@ -635,6 +642,7 @@
nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
{
struct nfs_entry old = *entry;
+ u64 cookie;
if (!*p++) {
if (!*p)
@@ -648,7 +656,8 @@
entry->name = (const char *) p;
p += XDR_QUADLEN(entry->len);
entry->prev_cookie = entry->cookie;
- p = xdr_decode_hyper(p, &entry->cookie);
+ p = xdr_decode_hyper(p, cookie);
+ entry->cookie = nfs_transform_cookie64(cookie);
if (plus) {
p = xdr_decode_post_op_attr(p, &entry->fattr);
diff -u --recursive --new-file linux-2.4.2-fh_align/fs/readdir.c linux-2.4.2-dir/fs/readdir.c
--- linux-2.4.2-fh_align/fs/readdir.c Mon Dec 11 22:45:42 2000
+++ linux-2.4.2-dir/fs/readdir.c Thu Feb 22 10:47:49 2001
@@ -315,7 +315,8 @@
lastdirent = buf.previous;
if (lastdirent) {
struct linux_dirent64 d;
- d.d_off = file->f_pos;
+ /* get the sign extension right */
+ d.d_off = (off_t)file->f_pos;
copy_to_user(&lastdirent->d_off, &d.d_off, sizeof(d.d_off));
error = count - buf.count;
}
On Thu, Feb 22, 2001 at 03:48:50PM +0100, Trond Myklebust wrote:
>
> Hi,
>
> After having tried to thrash out what exactly is the kernel
> interface for telldir/seekdir w.r.t. the existence of negative offsets
> with the glibc people, I've finally found a way to work within the
> current scheme.
>
> The problem at hand is that we currently would like to support the
> existence of directory cookies that take unsigned 32-bit values in
> NFSv2, unsigned 64-bit values in NFSv3.
>
> Given that most NFSv3 servers can/do use 32-bit cookies for
> compatibility with 32-bit systems, we would like to be able to pass
> this type of cookie back up to userland for use by the 32-bit
> interface.
> However the interface chosen both in glibc and partly in the kernel
> itself assumes all cookies are 32-bit signed values. Thus you have to
> find a way to cope with the kernel and glibc sign extending (almost)
> all cookies which have bit 31 set.
>
> The following patch therefore does 3 things:
>
> - Patch linux/fs/readdir.c so that file->f_pos is copied into the
> dirent64 structure with sign extension. This is for consistency
> with the behaviour of filldir64.
>
> - Patch NFSv2 xdr routines so that 32-bit cookies get extended to
> take 64-bit signed values (as opposed to unsigned values) for
> consistency with the fact that (l|)off_t are both signed.
>
> - Patch NFSv3 xdr routines with a hack that mimics sign extension
> on those cookies which are truly 32-bit unsigned.
> To do this we use the transformation
>
> (cookie & 0x80000000) ? cookie ^ 0xFFFFFFFF00000000 : cookie;
>
> Note that this a transformation has no effect on true 64-bit
> cookies because it is reversible.
>
> - Make a special version of 'lseek()' for NFS directories that
> returns 0 if the offset used was negative (rather than returning
> the offset itself). This avoids userland misinterpreting the
> return value as an error.
>
> The above fixes should ensure that all cookies taking values between 0
> and (2^32-1) on the NFS server are preserved through the 32-bit VFS
> interface, and are accepted by glibc as valid entries. It should also
> work fine with existing 64-bit architectures.
>
> Please could people give this a try, and report if it fixes the
> problems.
>
I don't know how it will work with real 64bit cookies on a 32bit
host for NFS V3 since you truncate it into 32bit during sign
extension.
H.J.
>>>>> " " == H J Lu <[email protected]> writes:
> I don't know how it will work with real 64bit cookies on a
> 32bit host for NFS V3 since you truncate it into 32bit during
> sign extension.
It won't for the moment, but that's a problem with the readdir() API
which uses the 32-bit off_t rather than loff_t for the filldir()
interface. The reason I added an extra truncation for the value of
file->f_pos (which is used to fill the d_off field only for the last
dirent) was for consistency with this interface.
I do have a patch to change the format of filldir too, but it'll
probably have to wait until Linux 2.5.
Cheers,
Trond
On Tue, Feb 27, 2001 at 07:57:36AM +0100, Trond Myklebust wrote:
> >>>>> " " == H J Lu <[email protected]> writes:
>
> > I don't know how it will work with real 64bit cookies on a
> > 32bit host for NFS V3 since you truncate it into 32bit during
> > sign extension.
>
> It won't for the moment, but that's a problem with the readdir() API
> which uses the 32-bit off_t rather than loff_t for the filldir()
I noticed that also.
> interface. The reason I added an extra truncation for the value of
> file->f_pos (which is used to fill the d_off field only for the last
> dirent) was for consistency with this interface.
>
> I do have a patch to change the format of filldir too, but it'll
> probably have to wait until Linux 2.5.
>
I much prefer to have a new getdents system call which will also return
d_type so that the 32 bit function in glibc can use this new getdents
instead of getdents64.
--
H.J. Lu ([email protected])
>>>>> " " == H J Lu <[email protected]> writes:
> I much prefer to have a new getdents system call which will
> also return d_type so that the 32 bit function in glibc can use
> this new getdents instead of getdents64.
That could also be done, however it seems odd to be adding a new
32-bit interface after the point when we're supposed to all have moved
to 64 bits.
My concern in presenting that patch is simply that if it is true that
we actually have a well defined interface for passing 32-bit cookies
via getdents64, and if it is true that everybody agrees on this
interface, then NFS has no choice but to try to conform.
Cheers,
Trond
BTW: does anybody anywhere actually use d_type? Certainly the standard
utilities such as 'find' or 'ls' don't seem to have been adapted
yet: I hacked up a version of NFSv3 that actually filled d_type
(by using readdirplus rather than readdir) but I've yet to find
any 'off the shelf' software, that uses the extra information.
Trond Myklebust wrote:
> >>>>> " " == H J Lu <[email protected]> writes:
>
> > I much prefer to have a new getdents system call which will
> > also return d_type so that the 32 bit function in glibc can use
> > this new getdents instead of getdents64.
>
> That could also be done, however it seems odd to be adding a new
> 32-bit interface after the point when we're supposed to all have moved
> to 64 bits.
Indeed, getdents64 -> 32 conversion is hardly performance critical
anyway.
> BTW: does anybody anywhere actually use d_type? Certainly the standard
> utilities such as 'find' or 'ls' don't seem to have been adapted
> yet: I hacked up a version of NFSv3 that actually filled d_type
> (by using readdirplus rather than readdir) but I've yet to find
> any 'off the shelf' software, that uses the extra information.
Look for `treescan' via Google. It uses a number of heuristics to
optimise a recursive directory search, and one of those is d_type if
available. Though it dates back to a time before getdents64 (I hacked
getdents to return d_type).
d_type is quite effective for `find' type scans. I really ought to
release an updated treescan -- the intent was always to replace the
backend of `find' but I got caught up trying to optimise the order of
open/readdir/close sequences and then on to other things.
-- Jamie
On Thu, Feb 22, 2001 at 03:48:50PM +0100, Trond Myklebust wrote:
>
> The above fixes should ensure that all cookies taking values between 0
> and (2^32-1) on the NFS server are preserved through the 32-bit VFS
> interface, and are accepted by glibc as valid entries. It should also
> work fine with existing 64-bit architectures.
>
> Please could people give this a try, and report if it fixes the
> problems.
Have you tried it against a Linux NFS V3 server? When I log in
with my home directory mounted from a Linux NFS V3 server, I
got kernel oops when I do
# cat /proc/mounts
I think the problem may be cookie transform thing.
> --- linux-2.4.2-fh_align/fs/nfs/nfs3xdr.c Fri Feb 9 20:29:44 2001
> +++ linux-2.4.2-dir/fs/nfs/nfs3xdr.c Thu Feb 22 10:47:49 2001
> @@ -523,6 +523,13 @@
> return 0;
> }
>
> +/* Hack to sign-extending 32-bit cookies */
> +static inline
> +u64 nfs_transform_cookie64(u64 cookie)
> +{
> + return (cookie & 0x80000000) ? (cookie ^ 0xFFFFFFFF00000000) : cookie;
> +}
> +
> /*
> * Encode arguments to readdir call
> */
> @@ -533,7 +540,7 @@
> int buflen, replen;
>
> p = xdr_encode_fhandle(p, args->fh);
> - p = xdr_encode_hyper(p, args->cookie);
> + p = xdr_encode_hyper(p, nfs_transform_cookie64(args->cookie));
> *p++ = args->verf[0];
> *p++ = args->verf[1];
> if (args->plus) {
> @@ -635,6 +642,7 @@
> nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
> {
> struct nfs_entry old = *entry;
> + u64 cookie;
>
> if (!*p++) {
> if (!*p)
> @@ -648,7 +656,8 @@
> entry->name = (const char *) p;
> p += XDR_QUADLEN(entry->len);
> entry->prev_cookie = entry->cookie;
> - p = xdr_decode_hyper(p, &entry->cookie);
> + p = xdr_decode_hyper(p, cookie);
> + entry->cookie = nfs_transform_cookie64(cookie);
I don't understand this. As far as I can tell, "cookie" is not
initialized at all. Even if it is initialized, what does
p = xdr_decode_hyper(p, cookie);
do?
H.J.
On Tue, Feb 27, 2001 at 03:04:32PM -0800, H . J . Lu wrote:
> > entry->prev_cookie = entry->cookie;
> > - p = xdr_decode_hyper(p, &entry->cookie);
> > + p = xdr_decode_hyper(p, cookie);
> > + entry->cookie = nfs_transform_cookie64(cookie);
>
> I don't understand this. As far as I can tell, "cookie" is not
> initialized at all. Even if it is initialized, what does
>
> p = xdr_decode_hyper(p, cookie);
>
Trond, I think you missed
p = xdr_decode_hyper(p, &cookie);
^
Yes, it does seem to work.
--
H.J. Lu ([email protected])
>>>>> " " == H J Lu <[email protected]> writes:
> On Tue, Feb 27, 2001 at 03:04:32PM -0800, H . J . Lu wrote:
>> > entry->prev_cookie = entry->cookie;
>> > - p = xdr_decode_hyper(p, &entry->cookie);
>> > + p = xdr_decode_hyper(p, cookie);
>> > + entry->cookie = nfs_transform_cookie64(cookie);
>>
>> I don't understand this. As far as I can tell, "cookie" is not
>> initialized at all. Even if it is initialized, what does
>>
>> p = xdr_decode_hyper(p, cookie);
>>
> Trond, I think you missed
> p = xdr_decode_hyper(p, &cookie);
> ^
Oops. You're quite right. As far as I can see, this bug seems to have
crept in when I created the patch itself. My own source has the
correct dereference, hence I didn't see any errors.
I've also updated the copy on http://www.fys.uio.no/~trondmy/src/2.4.2
Thanks,
Trond