2014-06-14 18:16:55

by Andy Lutomirski

[permalink] [raw]
Subject: [RFC 0/2] __vdso_findsym

The idea is to add AT_VDSO_FINDSYM pointing at __vdso_findsym. This
implements __vdso_findsym.

This would make it easier for runtimes that don't otherwise implement
ELF loaders to use the vdso.

Thoughts?

If people like the basic concept, I'll finish it, write up
documentation and a selftest. If people don't like it, I'll drop it.

Andy Lutomirski (2):
uapi: Add some missing dynamic table-related definitions to elf.h
[NOT READY] x86/vdso: Add __vdso_findsym

arch/x86/vdso/Makefile | 3 +
arch/x86/vdso/vdso-findsym.c | 136 ++++++++++++++++++++++++++++++++++++++++
arch/x86/vdso/vdso-layout.lds.S | 11 ++--
arch/x86/vdso/vdso.lds.S | 1 +
arch/x86/vdso/vdso2c.h | 2 +-
include/uapi/linux/elf.h | 121 +++++++++++++++++++++++++++++++++++
6 files changed, 268 insertions(+), 6 deletions(-)
create mode 100644 arch/x86/vdso/vdso-findsym.c

--
1.9.3


2014-06-14 18:16:59

by Andy Lutomirski

[permalink] [raw]
Subject: [RFC 1/2] uapi: Add some missing dynamic table-related definitions to elf.h

This adds STN_UNDEF and ELF symbol version data structures.

Signed-off-by: Andy Lutomirski <[email protected]>
---
include/uapi/linux/elf.h | 121 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 121 insertions(+)

diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index ef6103b..8b7cf23 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -134,6 +134,12 @@ typedef __s64 Elf64_Sxword;
#define ELF64_ST_BIND(x) ELF_ST_BIND(x)
#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x)

+/* Symbol table indices are found in the hash buckets and chain table
+ of a symbol hash table section. This special index value indicates
+ the end of a chain, meaning no further symbols are found in that bucket. */
+
+#define STN_UNDEF 0 /* End of a chain. */
+
typedef struct dynamic{
Elf32_Sword d_tag;
union{
@@ -197,6 +203,121 @@ typedef struct elf64_sym {
Elf64_Xword st_size; /* Associated symbol size */
} Elf64_Sym;

+/* Version definition sections. */
+
+typedef struct
+{
+ Elf32_Half vd_version; /* Version revision */
+ Elf32_Half vd_flags; /* Version information */
+ Elf32_Half vd_ndx; /* Version Index */
+ Elf32_Half vd_cnt; /* Number of associated aux entries */
+ Elf32_Word vd_hash; /* Version name hash value */
+ Elf32_Word vd_aux; /* Offset in bytes to verdaux array */
+ Elf32_Word vd_next; /* Offset in bytes to next verdef
+ entry */
+} Elf32_Verdef;
+
+typedef struct
+{
+ Elf64_Half vd_version; /* Version revision */
+ Elf64_Half vd_flags; /* Version information */
+ Elf64_Half vd_ndx; /* Version Index */
+ Elf64_Half vd_cnt; /* Number of associated aux entries */
+ Elf64_Word vd_hash; /* Version name hash value */
+ Elf64_Word vd_aux; /* Offset in bytes to verdaux array */
+ Elf64_Word vd_next; /* Offset in bytes to next verdef
+ entry */
+} Elf64_Verdef;
+
+
+/* Legal values for vd_version (version revision). */
+#define VER_DEF_NONE 0 /* No version */
+#define VER_DEF_CURRENT 1 /* Current version */
+#define VER_DEF_NUM 2 /* Given version number */
+
+/* Legal values for vd_flags (version information flags). */
+#define VER_FLG_BASE 0x1 /* Version definition of file itself */
+#define VER_FLG_WEAK 0x2 /* Weak version identifier */
+
+/* Versym symbol index values. */
+#define VER_NDX_LOCAL 0 /* Symbol is local. */
+#define VER_NDX_GLOBAL 1 /* Symbol is global. */
+#define VER_NDX_LORESERVE 0xff00 /* Beginning of reserved entries. */
+#define VER_NDX_ELIMINATE 0xff01 /* Symbol is to be eliminated. */
+
+/* Auxialiary version information. */
+
+typedef struct
+{
+ Elf32_Word vda_name; /* Version or dependency names */
+ Elf32_Word vda_next; /* Offset in bytes to next verdaux
+ entry */
+} Elf32_Verdaux;
+
+typedef struct
+{
+ Elf64_Word vda_name; /* Version or dependency names */
+ Elf64_Word vda_next; /* Offset in bytes to next verdaux
+ entry */
+} Elf64_Verdaux;
+
+
+/* Version dependency section. */
+
+typedef struct
+{
+ Elf32_Half vn_version; /* Version of structure */
+ Elf32_Half vn_cnt; /* Number of associated aux entries */
+ Elf32_Word vn_file; /* Offset of filename for this
+ dependency */
+ Elf32_Word vn_aux; /* Offset in bytes to vernaux array */
+ Elf32_Word vn_next; /* Offset in bytes to next verneed
+ entry */
+} Elf32_Verneed;
+
+typedef struct
+{
+ Elf64_Half vn_version; /* Version of structure */
+ Elf64_Half vn_cnt; /* Number of associated aux entries */
+ Elf64_Word vn_file; /* Offset of filename for this
+ dependency */
+ Elf64_Word vn_aux; /* Offset in bytes to vernaux array */
+ Elf64_Word vn_next; /* Offset in bytes to next verneed
+ entry */
+} Elf64_Verneed;
+
+
+/* Legal values for vn_version (version revision). */
+#define VER_NEED_NONE 0 /* No version */
+#define VER_NEED_CURRENT 1 /* Current version */
+#define VER_NEED_NUM 2 /* Given version number */
+
+/* Auxiliary needed version information. */
+
+typedef struct
+{
+ Elf32_Word vna_hash; /* Hash value of dependency name */
+ Elf32_Half vna_flags; /* Dependency specific information */
+ Elf32_Half vna_other; /* Unused */
+ Elf32_Word vna_name; /* Dependency name string offset */
+ Elf32_Word vna_next; /* Offset in bytes to next vernaux
+ entry */
+} Elf32_Vernaux;
+
+typedef struct
+{
+ Elf64_Word vna_hash; /* Hash value of dependency name */
+ Elf64_Half vna_flags; /* Dependency specific information */
+ Elf64_Half vna_other; /* Unused */
+ Elf64_Word vna_name; /* Dependency name string offset */
+ Elf64_Word vna_next; /* Offset in bytes to next vernaux
+ entry */
+} Elf64_Vernaux;
+
+
+/* Legal values for vna_flags. */
+#define VER_FLG_WEAK 0x2 /* Weak version identifier */
+

#define EI_NIDENT 16

--
1.9.3

2014-06-14 18:17:03

by Andy Lutomirski

[permalink] [raw]
Subject: [RFC 2/2] [NOT READY] x86/vdso: Add __vdso_findsym

The intent is to add a painless way for userspace libraries to find
vdso symbols. This will work by adding a vdso function
__vdso_findsym and a new auxvec entry AT_VDSO_FINDSYM that points at
it.

This isn't done yet. __vdso_findsym works, but it's only available
for the 64-bit vdso, and AT_VDSO_FINDSYM isn't implemented.

Getting this to work for x32 will require hackery or an actual x32
toolchain to build it.

Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/vdso/Makefile | 3 +
arch/x86/vdso/vdso-findsym.c | 136 ++++++++++++++++++++++++++++++++++++++++
arch/x86/vdso/vdso-layout.lds.S | 11 ++--
arch/x86/vdso/vdso.lds.S | 1 +
arch/x86/vdso/vdso2c.h | 2 +-
5 files changed, 147 insertions(+), 6 deletions(-)
create mode 100644 arch/x86/vdso/vdso-findsym.c

diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index ba6fc27..62e6938 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -16,7 +16,9 @@ vdso-install-$(VDSO32-y) += $(vdso32-images)

# files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o
+vobjs-y += vdso-findsym.o
vobjs-nox32 := vdso-fakesections.o
+vobjs-nox32 += vdso-findsym.o

# files to link into kernel
obj-y += vma.o
@@ -83,6 +85,7 @@ CFLAGS_REMOVE_vdso-note.o = -pg
CFLAGS_REMOVE_vclock_gettime.o = -pg
CFLAGS_REMOVE_vgetcpu.o = -pg
CFLAGS_REMOVE_vvar.o = -pg
+CFLAGS_REMOVE_vdso-findsym.o = -pg

#
# X32 processes use x32 vDSO to access 64bit kernel data.
diff --git a/arch/x86/vdso/vdso-findsym.c b/arch/x86/vdso/vdso-findsym.c
new file mode 100644
index 0000000..83b0c0a
--- /dev/null
+++ b/arch/x86/vdso/vdso-findsym.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2014 Andy Lutomirski
+ * Subject to the GNU Public License, v.2
+ *
+ * AT_VDSO_FINDSYM implementation
+ *
+ * AT_VDSO_FINDSYM is a mechanism that can be used by userspace runtimes
+ * that don't want to implement a full ELF parser. The ELF parser in
+ * here cheats heavily. It relies on the linker to do part of the work,
+ * and it makes questionable assumptions about the layout of some of the
+ * dynamic data structures. Those questionable assumptions are verified
+ * by vdso2c.
+ */
+
+/* Disable profiling for userspace code: */
+#define DISABLE_BRANCH_PROFILING
+
+#pragma GCC optimize ("Os")
+
+#include <linux/elf.h>
+#include <linux/compiler.h>
+
+struct elf_hash {
+ Elf64_Word nbuckets;
+ Elf64_Word nchains;
+ Elf64_Word data[];
+};
+
+/*
+ * These must be explicitly hidden: we need to access them via direct
+ * PC-relative relocations, not through the GOT, since we have no GOT.
+ */
+extern const char VDSO_START[] __attribute__((visibility("hidden")));
+extern const char DYN_STRTAB[] __attribute__((visibility("hidden")));
+extern Elf64_Sym DYN_SYMTAB[] __attribute__((visibility("hidden")));
+extern struct elf_hash DYN_HASH __attribute__((visibility("hidden")));
+extern Elf64_Half DYN_VERSYM[] __attribute__((visibility("hidden")));
+extern Elf64_Verdef DYN_VERDEF[] __attribute__((visibility("hidden")));
+
+/* Straight from the ELF specification. */
+static unsigned long elf_hash(const unsigned char *name)
+{
+ unsigned long h = 0, g;
+ while (*name)
+ {
+ h = (h << 4) + *name++;
+ if ((g = h & 0xf0000000) != 0)
+ h ^= g >> 24;
+ h &= ~g;
+ }
+ return h;
+}
+
+static bool strings_equal(const char *a, const char *b)
+{
+ while (true) {
+ if (*a != *b)
+ return false;
+ if (!*a)
+ return true;
+ a++;
+ b++;
+ }
+}
+
+static bool vdso_match_version(Elf64_Half ver,
+ const char *name, Elf64_Word hash)
+{
+ /*
+ * This is a helper function to check if the version indexed by
+ * ver matches name (which hashes to hash).
+ *
+ * The version definition table is a mess, and I don't know how
+ * to do this in better than linear time without allocating memory
+ * to build an index. I also don't know why the table has
+ * variable size entries in the first place.
+ *
+ * For added fun, I can't find a comprehensible specification of how
+ * to parse all the weird flags in the table.
+ *
+ * So I just parse the whole table every time.
+ */
+
+ Elf64_Verdef *def = DYN_VERDEF;
+ Elf64_Verdaux *aux;
+
+ /* First step: find the version definition */
+ ver &= 0x7fff; /* Apparently bit 15 means "hidden" */
+ while(true) {
+ if ((def->vd_flags & VER_FLG_BASE) == 0
+ && (def->vd_ndx & 0x7fff) == ver)
+ break;
+
+ if (def->vd_next == 0)
+ return false; /* No definition. */
+
+ def = (Elf64_Verdef *)((char *)def + def->vd_next);
+ }
+
+ /* Now figure out whether it matches. */
+ aux = (Elf64_Verdaux*)((char *)def + def->vd_aux);
+ return def->vd_hash == hash
+ && strings_equal(name, DYN_STRTAB + aux->vda_name);
+}
+
+void *__vdso_findsym(const char *name, const char *version)
+{
+ Elf64_Word chain = DYN_HASH.data[elf_hash(name) % DYN_HASH.nbuckets];
+ const Elf64_Word *chain_next = &DYN_HASH.data[DYN_HASH.nbuckets + 2];
+ unsigned long ver_hash = elf_hash(version);
+
+ for (; chain != STN_UNDEF; chain = chain_next[chain]) {
+ Elf64_Sym *sym = &DYN_SYMTAB[chain];
+
+ /* Check for a defined global or weak function w/ right name. */
+ if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+ continue;
+ if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+ ELF64_ST_BIND(sym->st_info) != STB_WEAK)
+ continue;
+ if (sym->st_shndx == SHN_UNDEF)
+ continue;
+ if (!strings_equal(name, DYN_STRTAB + sym->st_name))
+ continue;
+
+ /* Check symbol version. */
+ if (!vdso_match_version(DYN_VERSYM[chain],
+ version, ver_hash))
+ continue;
+
+ return (void *)((unsigned long)VDSO_START + sym->st_value);
+ }
+
+ return 0;
+}
+
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index 2ec72f6..2e8b692 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -8,14 +8,15 @@

SECTIONS
{
+ VDSO_START = .;
. = SIZEOF_HEADERS;

- .hash : { *(.hash) } :text
+ .hash : { DYN_HASH = .; *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
+ .dynsym : { DYN_SYMTAB = .; *(.dynsym) }
+ .dynstr : { DYN_STRTAB = .; *(.dynstr) }
+ .gnu.version : { DYN_VERSYM = .; *(.gnu.version) }
+ .gnu.version_d : { DYN_VERDEF = .; *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }

.note : { *(.note.*) } :text :note
diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S
index 75e3404..9b23d4e 100644
--- a/arch/x86/vdso/vdso.lds.S
+++ b/arch/x86/vdso/vdso.lds.S
@@ -22,6 +22,7 @@ VERSION {
__vdso_getcpu;
time;
__vdso_time;
+ __vdso_findsym;
local: *;
};
}
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index c6eefaf..931abac 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -51,7 +51,7 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
for (i = 0; dyn + i < dyn_end &&
GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag);
- if (tag == DT_REL || tag == DT_RELSZ ||
+ if (tag == DT_REL || tag == DT_RELA || tag == DT_RELSZ ||
tag == DT_RELENT || tag == DT_TEXTREL)
fail("vdso image contains dynamic relocations\n");
}
--
1.9.3

2014-06-14 21:30:06

by Rich Felker

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sat, Jun 14, 2014 at 11:16:42AM -0700, Andy Lutomirski wrote:
> The idea is to add AT_VDSO_FINDSYM pointing at __vdso_findsym. This
> implements __vdso_findsym.
>
> This would make it easier for runtimes that don't otherwise implement
> ELF loaders to use the vdso.
>
> Thoughts?
>
> If people like the basic concept, I'll finish it, write up
> documentation and a selftest. If people don't like it, I'll drop it.

I like the idea. I don't think it's immediately useful, since
libraries wanting to use vdso will likely want to support older
kernels that don't yet have this, and thus need to include their own
vdso parsing code. However, if we could get this in now, it would be
very useful a few years down the line where optimizing for old kernels
is not an important goal. I'd certainly like to be able to drop the
ELF parsing code from musl to reduce static binary size. And I could
see other projects possibly using it just to avoid the issue of ever
adding ELF parsing code in the first place.

Rich

2014-06-14 22:39:11

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sat, Jun 14, 2014 at 2:30 PM, Rich Felker <[email protected]> wrote:
> On Sat, Jun 14, 2014 at 11:16:42AM -0700, Andy Lutomirski wrote:
>> The idea is to add AT_VDSO_FINDSYM pointing at __vdso_findsym. This
>> implements __vdso_findsym.
>>
>> This would make it easier for runtimes that don't otherwise implement
>> ELF loaders to use the vdso.
>>
>> Thoughts?
>>
>> If people like the basic concept, I'll finish it, write up
>> documentation and a selftest. If people don't like it, I'll drop it.
>
> I like the idea. I don't think it's immediately useful, since
> libraries wanting to use vdso will likely want to support older
> kernels that don't yet have this, and thus need to include their own
> vdso parsing code. However, if we could get this in now, it would be
> very useful a few years down the line where optimizing for old kernels
> is not an important goal. I'd certainly like to be able to drop the
> ELF parsing code from musl to reduce static binary size. And I could
> see other projects possibly using it just to avoid the issue of ever
> adding ELF parsing code in the first place.
>

Hmm. I hadn't thought of this as a size win, but I guess it could be
if there are static binaries around.

This patch currently generates 488 bytes on text with my toolchain.
More than half of that is garbage needed to deal with the symbol
version tables, and I should be able to move a decent fraction of it
to vdso2c and therefore out of the kernel and vdso images.

--Andy

2014-06-14 23:36:43

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On 06/14/2014 03:38 PM, Andy Lutomirski wrote:
>
> Hmm. I hadn't thought of this as a size win, but I guess it could be
> if there are static binaries around.
>

That pretty much *is* the win, because this is in effect treating the
kernel vdso as a shared library.

> This patch currently generates 488 bytes on text with my toolchain.
> More than half of that is garbage needed to deal with the symbol
> version tables, and I should be able to move a decent fraction of it
> to vdso2c and therefore out of the kernel and vdso images.

Now, if we can do this without adding another page then that becomes
much more compelling IMO. I suspect we can if you are already down to
488 bytes.

-hpa

2014-06-15 03:59:54

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On 06/14/2014 04:36 PM, H. Peter Anvin wrote:
>
> Now, if we can do this without adding another page then that becomes
> much more compelling IMO. I suspect we can if you are already down to
> 488 bytes.
>

(If it isn't already the case, was inteded to be part of that...)

-hpa

2014-06-15 06:50:17

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sat, Jun 14, 2014 at 8:59 PM, H. Peter Anvin <[email protected]> wrote:
> On 06/14/2014 04:36 PM, H. Peter Anvin wrote:
>>
>> Now, if we can do this without adding another page then that becomes
>> much more compelling IMO. I suspect we can if you are already down to
>> 488 bytes.
>>
>
> (If it isn't already the case, was inteded to be part of that...)

It's down to 375 bytes now. I'm not sure how to do better than that
without starting to hardcode variously nasty assumptions.

With CONFIG_PARAVIRT_CLOCK=y I think the 64-bit vdso is two pages
anyway. With CONFIG_PARAVIRT_CLOCK=n, it's one page even with my
code.

(At some point I plan on tackling the paravirt clock code. It's much
more complicated and much slower than it needs to be.)

--Andy

2014-06-15 14:25:46

by Mikael Pettersson

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

Andy Lutomirski writes:
> The idea is to add AT_VDSO_FINDSYM pointing at __vdso_findsym. This
> implements __vdso_findsym.
>
> This would make it easier for runtimes that don't otherwise implement
> ELF loaders to use the vdso.
>
> Thoughts?

I'm opposed to this based on the principle that the kernel should NOT
be a dumping ground for random code that user-space can and should
implement for itself. As long as the vdso is correctly formatted ELF,
then parsing it is easy, and the kernel should not care at all if or
how user-space accesses it.

The fact that golang got it wrong is not an argument for moving this
functionality into the kernel.

/Mikael

2014-06-15 14:35:10

by Rich Felker

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 04:25:37PM +0200, Mikael Pettersson wrote:
> Andy Lutomirski writes:
> > The idea is to add AT_VDSO_FINDSYM pointing at __vdso_findsym. This
> > implements __vdso_findsym.
> >
> > This would make it easier for runtimes that don't otherwise implement
> > ELF loaders to use the vdso.
> >
> > Thoughts?
>
> I'm opposed to this based on the principle that the kernel should NOT
> be a dumping ground for random code that user-space can and should
> implement for itself. As long as the vdso is correctly formatted ELF,
> then parsing it is easy, and the kernel should not care at all if or
> how user-space accesses it.

Arguably, it was a mistake for the kernel to expose a virtual ELF to
begin with, and it should just have exposed a "lookup function by
name" operation to begin with. Yes this can be done in userspace, but
I see it more as a matter of "fixing a broken API design".

Rich

2014-06-15 15:47:43

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 7:35 AM, Rich Felker <[email protected]> wrote:
> On Sun, Jun 15, 2014 at 04:25:37PM +0200, Mikael Pettersson wrote:
>> Andy Lutomirski writes:
>> > The idea is to add AT_VDSO_FINDSYM pointing at __vdso_findsym. This
>> > implements __vdso_findsym.
>> >
>> > This would make it easier for runtimes that don't otherwise implement
>> > ELF loaders to use the vdso.
>> >
>> > Thoughts?
>>
>> I'm opposed to this based on the principle that the kernel should NOT
>> be a dumping ground for random code that user-space can and should
>> implement for itself. As long as the vdso is correctly formatted ELF,
>> then parsing it is easy, and the kernel should not care at all if or
>> how user-space accesses it.
>
> Arguably, it was a mistake for the kernel to expose a virtual ELF to
> begin with, and it should just have exposed a "lookup function by
> name" operation to begin with. Yes this can be done in userspace, but
> I see it more as a matter of "fixing a broken API design".

Is the __vdso_findsym(name, version) approach good? I don't know why
the vdso uses symbol versions at all, nor do I know why it uses weak
symbol aliases. The vdso's "clock_gettime" isn't a drop-in
replacement for glibc's: it doesn't touch error, and it returns
negative error codes.

It might make more sense to promise that the vdso will never export to
functions with the same name but different versions and to just get
rid of the version field from __vdso_findsym. I tried implementing
this; it's untested, but the text size is only 230 bytes :)

It might also make sense to look up by number. Yes, it smells like
Windows DLLs, but it also looks awfully like Linux syscalls, which do
just fine using a call-by-number scheme.

--Andy

2014-06-15 17:06:00

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On 06/15/2014 07:35 AM, Rich Felker wrote:
>
> Arguably, it was a mistake for the kernel to expose a virtual ELF to
> begin with, and it should just have exposed a "lookup function by
> name" operation to begin with. Yes this can be done in userspace, but
> I see it more as a matter of "fixing a broken API design".
>

What the fsck are you smoking? There is immense value in providing a
stable and very well-defined data structure, which also happens to be
what dynamic linkers already want to consume. Providing a helper for
crippled libc applications has potential value. Shaving a few hundred
bytes off static applications is a very weak argument, simply because it
is such a small fraction of the enormous cost of a static application,
and static applications are problematic in a number of other ways,
especially the lack of ability to fix bugs.

Treating the kernel as an ersatz dynamic library for "static"
applications is kind of silly -- after all, why not provide an entire
libc in the vdso? I have actually seen people advocate for doing that.

-hpa

2014-06-15 17:40:26

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 10:05 AM, H. Peter Anvin <[email protected]> wrote:
> On 06/15/2014 07:35 AM, Rich Felker wrote:
>>
>> Arguably, it was a mistake for the kernel to expose a virtual ELF to
>> begin with, and it should just have exposed a "lookup function by
>> name" operation to begin with. Yes this can be done in userspace, but
>> I see it more as a matter of "fixing a broken API design".
>>
>
> What the fsck are you smoking? There is immense value in providing a
> stable and very well-defined data structure, which also happens to be
> what dynamic linkers already want to consume. Providing a helper for
> crippled libc applications has potential value. Shaving a few hundred
> bytes off static applications is a very weak argument, simply because it
> is such a small fraction of the enormous cost of a static application,
> and static applications are problematic in a number of other ways,
> especially the lack of ability to fix bugs.
>
> Treating the kernel as an ersatz dynamic library for "static"
> applications is kind of silly -- after all, why not provide an entire
> libc in the vdso? I have actually seen people advocate for doing that.

To be clear, I have no desire whatsoever to give the vdso an actual
ELF parser or anything else that userspace should be providing itself.
I think that a special-purpose vdso parser in the vdso makes some
sense, though, since userspace might otherwise provide one for the
sole purpose of parsing the vdso.

And there's plenty of reasons that having the vdso be an ELF image is
useful. For one thing, gdb can take advantage of it. For another,
CRIU is parsing it for a rather different reason, and something like
__vdso_findsym won't fill that need.

Also, given the general lack of a comprehensible specification of what
the GNU flavor of the ELF format actually is [1], there's something to
be said for reducing the proliferation of ELF parsers. glibc and
binutils are quite unlikely to become incompatible with each other,
but I sincerely doubt that anyone from binutils land is likely to
review (and maintain!) my ELF parser, Go's, or a hypothetical future
ELF parser from any of the other glibc-less things. If those things
use one that's in the kernel, then it's easy for the kernel to
guarantee that each vdso image can successfully parse itself.

[1] The only comprehensible description of the GNU hash extension that
I could find is on Oracle's blog (!)

--Andy

2014-06-15 17:58:14

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On 06/15/2014 10:40 AM, Andy Lutomirski wrote:
>
> To be clear, I have no desire whatsoever to give the vdso an actual
> ELF parser or anything else that userspace should be providing itself.
> I think that a special-purpose vdso parser in the vdso makes some
> sense, though, since userspace might otherwise provide one for the
> sole purpose of parsing the vdso.
>
> And there's plenty of reasons that having the vdso be an ELF image is
> useful. For one thing, gdb can take advantage of it. For another,
> CRIU is parsing it for a rather different reason, and something like
> __vdso_findsym won't fill that need.
>
> Also, given the general lack of a comprehensible specification of what
> the GNU flavor of the ELF format actually is [1], there's something to
> be said for reducing the proliferation of ELF parsers. glibc and
> binutils are quite unlikely to become incompatible with each other,
> but I sincerely doubt that anyone from binutils land is likely to
> review (and maintain!) my ELF parser, Go's, or a hypothetical future
> ELF parser from any of the other glibc-less things. If those things
> use one that's in the kernel, then it's easy for the kernel to
> guarantee that each vdso image can successfully parse itself.
>
> [1] The only comprehensible description of the GNU hash extension that
> I could find is on Oracle's blog (!)
>

Yes, but that is why we provide the standard SysV hash. The GNU hash is
not too bad, but you're absolutely right the documentation stinks.

Providing a simple symbol lookup is an opportunistic thing, and might be
useful that way, and only because (as you say) the version in the vdso
would only need to be guaranteed to parse a single data structure --
that same vdso.

On the other hand, it better work, correctly, in every version of the
kernel, so I believe it will need to be done such that it is either
correct by construction or gets self-tested during the build process so
it errors out on failure. One simple way to do correct by construction
would be to do the "vdso entry point by index" -- a new kind of system
call numbers, in effect, as much as it has shades of Windows DLL with
their "ordinal numbers".

-hpa

2014-06-15 18:20:56

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym



On June 15, 2014 10:40:03 AM PDT, Andy Lutomirski <[email protected]> wrote:
>On Sun, Jun 15, 2014 at 10:05 AM, H. Peter Anvin <[email protected]> wrote:
>> On 06/15/2014 07:35 AM, Rich Felker wrote:
>>>
>>> Arguably, it was a mistake for the kernel to expose a virtual ELF to
>>> begin with, and it should just have exposed a "lookup function by
>>> name" operation to begin with. Yes this can be done in userspace,
>but
>>> I see it more as a matter of "fixing a broken API design".
>>>
>>
>> What the fsck are you smoking? There is immense value in providing a
>> stable and very well-defined data structure, which also happens to be
>> what dynamic linkers already want to consume. Providing a helper for
>> crippled libc applications has potential value. Shaving a few
>hundred
>> bytes off static applications is a very weak argument, simply because
>it
>> is such a small fraction of the enormous cost of a static
>application,
>> and static applications are problematic in a number of other ways,
>> especially the lack of ability to fix bugs.
>>
>> Treating the kernel as an ersatz dynamic library for "static"
>> applications is kind of silly -- after all, why not provide an entire
>> libc in the vdso? I have actually seen people advocate for doing
>that.
>
>To be clear, I have no desire whatsoever to give the vdso an actual
>ELF parser or anything else that userspace should be providing itself.
>I think that a special-purpose vdso parser in the vdso makes some
>sense, though, since userspace might otherwise provide one for the
>sole purpose of parsing the vdso.
>
>And there's plenty of reasons that having the vdso be an ELF image is
>useful. For one thing, gdb can take advantage of it. For another,
>CRIU is parsing it for a rather different reason, and something like
>__vdso_findsym won't fill that need.
>
>Also, given the general lack of a comprehensible specification of what
>the GNU flavor of the ELF format actually is [1], there's something to
>be said for reducing the proliferation of ELF parsers. glibc and
>binutils are quite unlikely to become incompatible with each other,
>but I sincerely doubt that anyone from binutils land is likely to
>review (and maintain!) my ELF parser, Go's, or a hypothetical future
>ELF parser from any of the other glibc-less things. If those things
>use one that's in the kernel, then it's easy for the kernel to
>guarantee that each vdso image can successfully parse itself.
>
>[1] The only comprehensible description of the GNU hash extension that
>I could find is on Oracle's blog (!)
>

Curious about this blog. We do have a GNU hash implementation in Syslinux, too, for another reference.

--
Sent from my mobile phone. Please pardon brevity and lack of formatting.

2014-06-15 18:21:11

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

cc: Andi Kleen, who designed the vdso

On Sun, Jun 15, 2014 at 10:57 AM, H. Peter Anvin <[email protected]> wrote:
> On 06/15/2014 10:40 AM, Andy Lutomirski wrote:
>>
>> To be clear, I have no desire whatsoever to give the vdso an actual
>> ELF parser or anything else that userspace should be providing itself.
>> I think that a special-purpose vdso parser in the vdso makes some
>> sense, though, since userspace might otherwise provide one for the
>> sole purpose of parsing the vdso.
>>
>> And there's plenty of reasons that having the vdso be an ELF image is
>> useful. For one thing, gdb can take advantage of it. For another,
>> CRIU is parsing it for a rather different reason, and something like
>> __vdso_findsym won't fill that need.
>>
>> Also, given the general lack of a comprehensible specification of what
>> the GNU flavor of the ELF format actually is [1], there's something to
>> be said for reducing the proliferation of ELF parsers. glibc and
>> binutils are quite unlikely to become incompatible with each other,
>> but I sincerely doubt that anyone from binutils land is likely to
>> review (and maintain!) my ELF parser, Go's, or a hypothetical future
>> ELF parser from any of the other glibc-less things. If those things
>> use one that's in the kernel, then it's easy for the kernel to
>> guarantee that each vdso image can successfully parse itself.
>>
>> [1] The only comprehensible description of the GNU hash extension that
>> I could find is on Oracle's blog (!)
>>
>
> Yes, but that is why we provide the standard SysV hash. The GNU hash is
> not too bad, but you're absolutely right the documentation stinks.
>
> Providing a simple symbol lookup is an opportunistic thing, and might be
> useful that way, and only because (as you say) the version in the vdso
> would only need to be guaranteed to parse a single data structure --
> that same vdso.
>
> On the other hand, it better work, correctly, in every version of the
> kernel, so I believe it will need to be done such that it is either
> correct by construction or gets self-tested during the build process so
> it errors out on failure.

I was thinking of adding something to selftests that would check that
__vdso_findsym can find every exported symbol, check that it can't
find the ones it shouldn't find, and call it on a bunch of garbage
strings to make sure it rejects them.


> One simple way to do correct by construction
> would be to do the "vdso entry point by index" -- a new kind of system
> call numbers, in effect, as much as it has shades of Windows DLL with
> their "ordinal numbers".

It's certainly easy. It's a little gross, and I sort of feel bad
about having two parallel ways of referring to a vdso function -- one
used by ELF parsers and one used by the new thing. Using an array
also wins on speed and code size. *sigh* -- I'm torn on this one.

Do you know why the vdso uses symbol versioning and weak symbols in
the first place? This seems to date back all the way to the beginning
(2aae950b21e4bc789d1fc6668faf67e8748300b7). If we're going to add a
new way to find vdso symbols, I would like to at least drop support
for versions.

--Andy

2014-06-15 18:23:12

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 11:20 AM, H. Peter Anvin <[email protected]> wrote:
>
>
> On June 15, 2014 10:40:03 AM PDT, Andy Lutomirski <[email protected]> wrote:
>>On Sun, Jun 15, 2014 at 10:05 AM, H. Peter Anvin <[email protected]> wrote:
>>> On 06/15/2014 07:35 AM, Rich Felker wrote:
>>>>
>>>> Arguably, it was a mistake for the kernel to expose a virtual ELF to
>>>> begin with, and it should just have exposed a "lookup function by
>>>> name" operation to begin with. Yes this can be done in userspace,
>>but
>>>> I see it more as a matter of "fixing a broken API design".
>>>>
>>>
>>> What the fsck are you smoking? There is immense value in providing a
>>> stable and very well-defined data structure, which also happens to be
>>> what dynamic linkers already want to consume. Providing a helper for
>>> crippled libc applications has potential value. Shaving a few
>>hundred
>>> bytes off static applications is a very weak argument, simply because
>>it
>>> is such a small fraction of the enormous cost of a static
>>application,
>>> and static applications are problematic in a number of other ways,
>>> especially the lack of ability to fix bugs.
>>>
>>> Treating the kernel as an ersatz dynamic library for "static"
>>> applications is kind of silly -- after all, why not provide an entire
>>> libc in the vdso? I have actually seen people advocate for doing
>>that.
>>
>>To be clear, I have no desire whatsoever to give the vdso an actual
>>ELF parser or anything else that userspace should be providing itself.
>>I think that a special-purpose vdso parser in the vdso makes some
>>sense, though, since userspace might otherwise provide one for the
>>sole purpose of parsing the vdso.
>>
>>And there's plenty of reasons that having the vdso be an ELF image is
>>useful. For one thing, gdb can take advantage of it. For another,
>>CRIU is parsing it for a rather different reason, and something like
>>__vdso_findsym won't fill that need.
>>
>>Also, given the general lack of a comprehensible specification of what
>>the GNU flavor of the ELF format actually is [1], there's something to
>>be said for reducing the proliferation of ELF parsers. glibc and
>>binutils are quite unlikely to become incompatible with each other,
>>but I sincerely doubt that anyone from binutils land is likely to
>>review (and maintain!) my ELF parser, Go's, or a hypothetical future
>>ELF parser from any of the other glibc-less things. If those things
>>use one that's in the kernel, then it's easy for the kernel to
>>guarantee that each vdso image can successfully parse itself.
>>
>>[1] The only comprehensible description of the GNU hash extension that
>>I could find is on Oracle's blog (!)
>>
>
> Curious about this blog. We do have a GNU hash implementation in Syslinux, too, for another reference.
>

https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections

FWIW, I bet that __vdso_findsym could be smaller if it used the GNU
hash. Maybe it would save about the same amount of space that turning
on the GNU hash would take up.

--Andy

2014-06-15 18:40:29

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

Symbol versioning so we can rev the ABI and still provide backwards compatibility. Weak symbols so the libc can override symbols if it considers it appropriate. This is a good thing.

On June 15, 2014 11:20:41 AM PDT, Andy Lutomirski <[email protected]> wrote:
>cc: Andi Kleen, who designed the vdso
>
>On Sun, Jun 15, 2014 at 10:57 AM, H. Peter Anvin <[email protected]> wrote:
>> On 06/15/2014 10:40 AM, Andy Lutomirski wrote:
>>>
>>> To be clear, I have no desire whatsoever to give the vdso an actual
>>> ELF parser or anything else that userspace should be providing
>itself.
>>> I think that a special-purpose vdso parser in the vdso makes some
>>> sense, though, since userspace might otherwise provide one for the
>>> sole purpose of parsing the vdso.
>>>
>>> And there's plenty of reasons that having the vdso be an ELF image
>is
>>> useful. For one thing, gdb can take advantage of it. For another,
>>> CRIU is parsing it for a rather different reason, and something like
>>> __vdso_findsym won't fill that need.
>>>
>>> Also, given the general lack of a comprehensible specification of
>what
>>> the GNU flavor of the ELF format actually is [1], there's something
>to
>>> be said for reducing the proliferation of ELF parsers. glibc and
>>> binutils are quite unlikely to become incompatible with each other,
>>> but I sincerely doubt that anyone from binutils land is likely to
>>> review (and maintain!) my ELF parser, Go's, or a hypothetical future
>>> ELF parser from any of the other glibc-less things. If those things
>>> use one that's in the kernel, then it's easy for the kernel to
>>> guarantee that each vdso image can successfully parse itself.
>>>
>>> [1] The only comprehensible description of the GNU hash extension
>that
>>> I could find is on Oracle's blog (!)
>>>
>>
>> Yes, but that is why we provide the standard SysV hash. The GNU hash
>is
>> not too bad, but you're absolutely right the documentation stinks.
>>
>> Providing a simple symbol lookup is an opportunistic thing, and might
>be
>> useful that way, and only because (as you say) the version in the
>vdso
>> would only need to be guaranteed to parse a single data structure --
>> that same vdso.
>>
>> On the other hand, it better work, correctly, in every version of the
>> kernel, so I believe it will need to be done such that it is either
>> correct by construction or gets self-tested during the build process
>so
>> it errors out on failure.
>
>I was thinking of adding something to selftests that would check that
>__vdso_findsym can find every exported symbol, check that it can't
>find the ones it shouldn't find, and call it on a bunch of garbage
>strings to make sure it rejects them.
>
>
>> One simple way to do correct by construction
>> would be to do the "vdso entry point by index" -- a new kind of
>system
>> call numbers, in effect, as much as it has shades of Windows DLL with
>> their "ordinal numbers".
>
>It's certainly easy. It's a little gross, and I sort of feel bad
>about having two parallel ways of referring to a vdso function -- one
>used by ELF parsers and one used by the new thing. Using an array
>also wins on speed and code size. *sigh* -- I'm torn on this one.
>
>Do you know why the vdso uses symbol versioning and weak symbols in
>the first place? This seems to date back all the way to the beginning
>(2aae950b21e4bc789d1fc6668faf67e8748300b7). If we're going to add a
>new way to find vdso symbols, I would like to at least drop support
>for versions.
>
>--Andy

--
Sent from my mobile phone. Please pardon brevity and lack of formatting.

2014-06-15 18:54:33

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 11:39 AM, H. Peter Anvin <[email protected]> wrote:
> Symbol versioning so we can rev the ABI and still provide backwards compatibility. Weak symbols so the libc can override symbols if it considers it appropriate. This is a good thing.

Are we ever going to change, say, the __vdso_clock_gettime ABI without
renaming the function? If we want to preserve that ability, I can
keep support for versions, but it seems odd.

I don't buy the weak symbol argument at all. We currently expose a
strong symbol __vdso_clock_gettime and a weak alias clock_gettime. I
agree that, if glibc treats us as a real DSO, then clock_gettime can't
be strong, but I don't see why it should exist at all (other than for
backwards compatibility).

--Andy

2014-06-15 19:14:43

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

If it doesn't, then you incur an additional indirection penalty. The strong __vdso symbol allows the libc wrapper to fall back to the vdso implementation, the weak symbol allows three to be no wrapper at all. This is good.

The reason for changing ABI would be shifting types. This is very much how glibc manages transitions.



On June 15, 2014 11:54:10 AM PDT, Andy Lutomirski <[email protected]> wrote:
>On Sun, Jun 15, 2014 at 11:39 AM, H. Peter Anvin <[email protected]> wrote:
>> Symbol versioning so we can rev the ABI and still provide backwards
>compatibility. Weak symbols so the libc can override symbols if it
>considers it appropriate. This is a good thing.
>
>Are we ever going to change, say, the __vdso_clock_gettime ABI without
>renaming the function? If we want to preserve that ability, I can
>keep support for versions, but it seems odd.
>
>I don't buy the weak symbol argument at all. We currently expose a
>strong symbol __vdso_clock_gettime and a weak alias clock_gettime. I
>agree that, if glibc treats us as a real DSO, then clock_gettime can't
>be strong, but I don't see why it should exist at all (other than for
>backwards compatibility).
>
>--Andy

--
Sent from my mobile phone. Please pardon brevity and lack of formatting.

2014-06-15 19:22:20

by Ian Lance Taylor

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 12:14 PM, H. Peter Anvin <[email protected]> wrote:
>
> If it doesn't, then you incur an additional indirection penalty. The strong __vdso symbol allows the libc wrapper to fall back to the vdso implementation, the weak symbol allows three to be no wrapper at all. This is good.
>
> The reason for changing ABI would be shifting types. This is very much how glibc manages transitions.

The purpose of symbol versioning is so that symbols with well known
names, like stat, can continue to use those same names while changing
types. Both old and new programs can continue to use the name stat
and continue to work even though they use different types.

I don't see how this applies to the kernel VDSO. Those symbols do not
use well-known names; they use names like __vdso_time. If you change
the types used by those symbols, you can change the name as well.
What is the downside?

Ian

2014-06-15 19:31:50

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

The weak symbols are well-known names. The __vdso symbols are strong.

On June 15, 2014 12:22:17 PM PDT, Ian Lance Taylor <[email protected]> wrote:
>On Sun, Jun 15, 2014 at 12:14 PM, H. Peter Anvin <[email protected]> wrote:
>>
>> If it doesn't, then you incur an additional indirection penalty. The
>strong __vdso symbol allows the libc wrapper to fall back to the vdso
>implementation, the weak symbol allows three to be no wrapper at all.
>This is good.
>>
>> The reason for changing ABI would be shifting types. This is very
>much how glibc manages transitions.
>
>The purpose of symbol versioning is so that symbols with well known
>names, like stat, can continue to use those same names while changing
>types. Both old and new programs can continue to use the name stat
>and continue to work even though they use different types.
>
>I don't see how this applies to the kernel VDSO. Those symbols do not
>use well-known names; they use names like __vdso_time. If you change
>the types used by those symbols, you can change the name as well.
>What is the downside?
>
>Ian

--
Sent from my mobile phone. Please pardon brevity and lack of formatting.

2014-06-15 19:50:40

by Ian Lance Taylor

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 12:31 PM, H. Peter Anvin <[email protected]> wrote:
> The weak symbols are well-known names. The __vdso symbols are strong.

I see. But I don't understand how this is supposed to work. When I
link a program against gettimeofday, I get a reference to gettimeofday
with version GLIBC_2.2.5. After all, I only link against libc.so; I
don't link against the vDSO. The VDSO provides gettimeofday with
version LINUX_2.6. Since those versions don't match, the gettimeofday
reference in my executable will not be satisfied by the definition in
the vDSO. So at dynamic link time my program is always going to be
linked with the gettimeofday in libc.so, which will in turn call the
gettimeofday in the vDSO.

Am I missing something that makes the definition of gettimeofday with
version LINUX_2.6 in the vDSO useful?

Ian



> On June 15, 2014 12:22:17 PM PDT, Ian Lance Taylor <[email protected]> wrote:
>>On Sun, Jun 15, 2014 at 12:14 PM, H. Peter Anvin <[email protected]> wrote:
>>>
>>> If it doesn't, then you incur an additional indirection penalty. The
>>strong __vdso symbol allows the libc wrapper to fall back to the vdso
>>implementation, the weak symbol allows three to be no wrapper at all.
>>This is good.
>>>
>>> The reason for changing ABI would be shifting types. This is very
>>much how glibc manages transitions.
>>
>>The purpose of symbol versioning is so that symbols with well known
>>names, like stat, can continue to use those same names while changing
>>types. Both old and new programs can continue to use the name stat
>>and continue to work even though they use different types.
>>
>>I don't see how this applies to the kernel VDSO. Those symbols do not
>>use well-known names; they use names like __vdso_time. If you change
>>the types used by those symbols, you can change the name as well.
>>What is the downside?
>>
>>Ian
>
> --
> Sent from my mobile phone. Please pardon brevity and lack of formatting.

2014-06-15 19:57:31

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

You can link against vdso.so if you want to; the kernel build provides an option to install that image. Doesn't mean that any particular libc uses it.

On June 15, 2014 12:50:36 PM PDT, Ian Lance Taylor <[email protected]> wrote:
>On Sun, Jun 15, 2014 at 12:31 PM, H. Peter Anvin <[email protected]> wrote:
>> The weak symbols are well-known names. The __vdso symbols are
>strong.
>
>I see. But I don't understand how this is supposed to work. When I
>link a program against gettimeofday, I get a reference to gettimeofday
>with version GLIBC_2.2.5. After all, I only link against libc.so; I
>don't link against the vDSO. The VDSO provides gettimeofday with
>version LINUX_2.6. Since those versions don't match, the gettimeofday
>reference in my executable will not be satisfied by the definition in
>the vDSO. So at dynamic link time my program is always going to be
>linked with the gettimeofday in libc.so, which will in turn call the
>gettimeofday in the vDSO.
>
>Am I missing something that makes the definition of gettimeofday with
>version LINUX_2.6 in the vDSO useful?
>
>Ian
>
>
>
>> On June 15, 2014 12:22:17 PM PDT, Ian Lance Taylor <[email protected]>
>wrote:
>>>On Sun, Jun 15, 2014 at 12:14 PM, H. Peter Anvin <[email protected]>
>wrote:
>>>>
>>>> If it doesn't, then you incur an additional indirection penalty.
>The
>>>strong __vdso symbol allows the libc wrapper to fall back to the vdso
>>>implementation, the weak symbol allows three to be no wrapper at all.
>>>This is good.
>>>>
>>>> The reason for changing ABI would be shifting types. This is very
>>>much how glibc manages transitions.
>>>
>>>The purpose of symbol versioning is so that symbols with well known
>>>names, like stat, can continue to use those same names while changing
>>>types. Both old and new programs can continue to use the name stat
>>>and continue to work even though they use different types.
>>>
>>>I don't see how this applies to the kernel VDSO. Those symbols do
>not
>>>use well-known names; they use names like __vdso_time. If you change
>>>the types used by those symbols, you can change the name as well.
>>>What is the downside?
>>>
>>>Ian
>>
>> --
>> Sent from my mobile phone. Please pardon brevity and lack of
>formatting.

--
Sent from my mobile phone. Please pardon brevity and lack of formatting.

2014-06-15 20:53:41

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 12:56 PM, H. Peter Anvin <[email protected]> wrote:
> You can link against vdso.so if you want to; the kernel build provides an option to install that image. Doesn't mean that any particular libc uses it.

I'd be rather surprised if anyone does this, given the generally
spotty state of vdso.so installation and that there's no such thing as
an ELF import library.

Also, the vdso's clock_gettime isn't actually compatible with
glibc/POSIX -- the return value and errno handling don't match.

--Andy

2014-06-15 21:14:08

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

Yes, but the option exists.

On June 15, 2014 1:53:17 PM PDT, Andy Lutomirski <[email protected]> wrote:
>On Sun, Jun 15, 2014 at 12:56 PM, H. Peter Anvin <[email protected]> wrote:
>> You can link against vdso.so if you want to; the kernel build
>provides an option to install that image. Doesn't mean that any
>particular libc uses it.
>
>I'd be rather surprised if anyone does this, given the generally
>spotty state of vdso.so installation and that there's no such thing as
>an ELF import library.
>
>Also, the vdso's clock_gettime isn't actually compatible with
>glibc/POSIX -- the return value and errno handling don't match.
>
>--Andy

--
Sent from my mobile phone. Please pardon brevity and lack of formatting.

2014-06-16 00:56:37

by Rich Felker

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 12:14:04PM -0700, H. Peter Anvin wrote:
> If it doesn't, then you incur an additional indirection penalty. The
> strong __vdso symbol allows the libc wrapper to fall back to the
> vdso implementation, the weak symbol allows three to be no wrapper
> at all. This is good.

No it doesn't. The weak symbol does not implement the interface
contract of the libc function; on error it's required to set errno,
and of course the kernel/vdso cannot do this. Also, there's no way the
symbol versions the kernel provides can match up with userspace symbol
versions that consumers of the function would need. So these features
are just useless.

Rich

2014-06-16 01:07:15

by Rich Felker

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 10:05:47AM -0700, H. Peter Anvin wrote:
> On 06/15/2014 07:35 AM, Rich Felker wrote:
> >
> > Arguably, it was a mistake for the kernel to expose a virtual ELF to
> > begin with, and it should just have exposed a "lookup function by
> > name" operation to begin with. Yes this can be done in userspace, but
> > I see it more as a matter of "fixing a broken API design".
> >
>
> What the fsck are you smoking? There is immense value in providing a
> stable and very well-defined data structure,

I believe the widespread consensus is that there's more value in
providing a well-defined interface and keeping your data structures
opaque. Even if it's useful for the data structure to be there for the
debugger's sake (debuggers have to know about binary formats, etc.)
that's not an excuse for components of the system which have nothing
to do with ELF to have to know about it.

> which also happens to be
> what dynamic linkers already want to consume. Providing a helper for
> crippled libc applications has potential value. Shaving a few hundred
> bytes off static applications is a very weak argument, simply because it
> is such a small fraction of the enormous cost of a static application,

No, it's not. It could very easily be 5% of the binary size for plenty
of utilities that are useful to make static. Anyway it's not that a
single ~450 byte function is such a cost for static binaries in
itself; it's that if one is not vigilant against this kind of creep,
it ends up not being one ~450 byte function but quickly 5, 10, 20,
etc. of them, and eventually you get to the point (see: glibc) where
static linking is useless.

> and static applications are problematic in a number of other ways,
> especially the lack of ability to fix bugs.

This is a standard fallacy I won't address here because it's
off-topic.

> Treating the kernel as an ersatz dynamic library for "static"
> applications is kind of silly -- after all, why not provide an entire
> libc in the vdso? I have actually seen people advocate for doing that.

I consider that a very bad idea. What I think vdso should provide is
just fast versions of syscalls that can be performed without entering
kernelspace, but for which it's impossible to write such an
implementation in userspace because it (a) has to interact with
kernel-provided data structures that should not be public APIs, and
(b) requires hardware-specific versions of the function. However, I
think there's a value in providing access to these syscall
replacements in an easy-to-use manner that doesn't require that
components which have nothing to do with ELF (i.e. the time functions)
be aware of ELF data structures. And this is why I support the
proposal.

Rich

2014-06-16 02:36:10

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym


At least versioning is always useful to have, just to have
another tool for compatibility if changes are needed.

Not sure about weak, but it can't hurt. It seems to be standard practice
in glibc.

I haven't looked into this in detail, but my initial assumption would
be that it wouldn't be useful to add new vdso interfaces just for Go.
After all would you really want to force ever Go user to upgrade their
kernel just to get fast fime? So it has to work with whatever is already
there anyways.

I agree that current interface is not great (I didn't design it BTW,
just adapted it from i386), but it should be implementable.

-Andi
--
[email protected] -- Speaking for myself only.

2014-06-16 03:50:31

by Rich Felker

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Mon, Jun 16, 2014 at 04:36:04AM +0200, Andi Kleen wrote:
>
> At least versioning is always useful to have, just to have
> another tool for compatibility if changes are needed.

I disagree. Just like syscalls, vdso functions with new APIs/ABIs
should have new names. For example if a version of clock_gettime for a
new userspace with 64-bit time_t is added, it should be called
clock_gettime64 (with a corresponding __NR_clock_gettime64), not
offered as a versioned symbol over top of the old one. The versioned
symbol approach is a lot more confusing and as far as I can tell
offers no benefits.

Rich

2014-06-16 06:40:13

by Rich Felker

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 11:22:48AM -0700, Andy Lutomirski wrote:
> >>[1] The only comprehensible description of the GNU hash extension that
> >>I could find is on Oracle's blog (!)
> >>
> >
> > Curious about this blog. We do have a GNU hash implementation in Syslinux, too, for another reference.
> >
>
> https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
>
> FWIW, I bet that __vdso_findsym could be smaller if it used the GNU
> hash. Maybe it would save about the same amount of space that turning
> on the GNU hash would take up.

How so? My implementation of gnu hash lookup in musl's dynamic linker
is somewhat larger than the sysv hash lookup, and that's even with
skipping the bloom filter (which did not seem to yield any benefit,
though we may revisit this issue later). The actual hash function is
_slightly_ smaller for gnu, but the lookup function has a lot more
work to do.

Of course for the size of vdso's symbol table, a pure linear search
with no hash table would suffice...

Rich

2014-06-16 14:08:53

by Ian Lance Taylor

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Sun, Jun 15, 2014 at 7:36 PM, Andi Kleen <[email protected]> wrote:
>
> I haven't looked into this in detail, but my initial assumption would
> be that it wouldn't be useful to add new vdso interfaces just for Go.
> After all would you really want to force ever Go user to upgrade their
> kernel just to get fast fime? So it has to work with whatever is already
> there anyways.

Go works fine with the current interface. There was, arguably, a bug
in Go's old implementation in that it assumed that the vDSO would have
a normal symbol table. That bug has already been fixed.

I think this issue started when some of the Go developers questioned
why the kernel needed to provide a very complex interface--parsing an
ELF shared shared library--for very simple functionality--looking up
the address of a magic function. This approach has required special
support not just in Go, but also in the dynamic linker and gdb, and
does not work well for statically linked binaries. The support in gdb
is perhaps a good idea, but elsewhere it does not make sense.

So why not provide a simple interface?

Ian

2014-06-16 14:38:07

by Andi Kleen

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

> I think this issue started when some of the Go developers questioned
> why the kernel needed to provide a very complex interface--parsing an
> ELF shared shared library--for very simple functionality--looking up
> the address of a magic function. This approach has required special
> support not just in Go, but also in the dynamic linker and gdb, and
> does not work well for statically linked binaries. The support in gdb
> is perhaps a good idea, but elsewhere it does not make sense.
>
> So why not provide a simple interface?

What good would it do now that everyone already supports it?

The proposal is 10+ years too late.

-andi

2014-06-16 14:57:38

by Rich Felker

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Mon, Jun 16, 2014 at 07:08:50AM -0700, Ian Lance Taylor wrote:
> On Sun, Jun 15, 2014 at 7:36 PM, Andi Kleen <[email protected]> wrote:
> >
> > I haven't looked into this in detail, but my initial assumption would
> > be that it wouldn't be useful to add new vdso interfaces just for Go.
> > After all would you really want to force ever Go user to upgrade their
> > kernel just to get fast fime? So it has to work with whatever is already
> > there anyways.
>
> Go works fine with the current interface. There was, arguably, a bug
> in Go's old implementation in that it assumed that the vDSO would have
> a normal symbol table. That bug has already been fixed.
>
> I think this issue started when some of the Go developers questioned
> why the kernel needed to provide a very complex interface--parsing an
> ELF shared shared library--for very simple functionality--looking up
> the address of a magic function. This approach has required special
> support not just in Go, but also in the dynamic linker and gdb, and
> does not work well for statically linked binaries. The support in gdb
> is perhaps a good idea, but elsewhere it does not make sense.
>
> So why not provide a simple interface?

This is my sentiment as well, but I think you've stated it better than
I have.

Also note that, if the vdso is providing a "lookup the address of a
magic function by name" function, it need not be a full ELF parser,
i.e. it doesn't need to be able to deal with any ELF files except the
vdso provided by the current kernel. This probably allows the parser
to be considerably smaller; for example, all the version stuff can be
omitted unless the actual kernel in use provides multiple symbol
versions.

Rich

2014-06-16 14:59:47

by Rich Felker

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Mon, Jun 16, 2014 at 04:38:01PM +0200, Andi Kleen wrote:
> > I think this issue started when some of the Go developers questioned
> > why the kernel needed to provide a very complex interface--parsing an
> > ELF shared shared library--for very simple functionality--looking up
> > the address of a magic function. This approach has required special
> > support not just in Go, but also in the dynamic linker and gdb, and
> > does not work well for statically linked binaries. The support in gdb
> > is perhaps a good idea, but elsewhere it does not make sense.
> >
> > So why not provide a simple interface?
>
> What good would it do now that everyone already supports it?
>
> The proposal is 10+ years too late.

I addressed that in one of my first emails to this thread. I don't
think adding it now is immediately useful, but it will be very nice to
have in a few years when optimizing for kernels from 2014 and earlier
is no longer a priority. I would expect new users of vdso at that time
not to bother with the ELF parsing ugliness and simply to use the
lookup interface. It's not like they'd be losing any functionality on
old kernels, just some performance due to falling back to the syscall.

Rich

2014-06-16 15:31:43

by Ian Lance Taylor

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Mon, Jun 16, 2014 at 7:38 AM, Andi Kleen <[email protected]> wrote:
>> I think this issue started when some of the Go developers questioned
>> why the kernel needed to provide a very complex interface--parsing an
>> ELF shared shared library--for very simple functionality--looking up
>> the address of a magic function. This approach has required special
>> support not just in Go, but also in the dynamic linker and gdb, and
>> does not work well for statically linked binaries. The support in gdb
>> is perhaps a good idea, but elsewhere it does not make sense.
>>
>> So why not provide a simple interface?
>
> What good would it do now that everyone already supports it?

Do statically linked binaries use the vDSO calls?

Ian

2014-06-16 15:43:44

by Rich Felker

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Mon, Jun 16, 2014 at 08:31:39AM -0700, Ian Lance Taylor wrote:
> On Mon, Jun 16, 2014 at 7:38 AM, Andi Kleen <[email protected]> wrote:
> >> I think this issue started when some of the Go developers questioned
> >> why the kernel needed to provide a very complex interface--parsing an
> >> ELF shared shared library--for very simple functionality--looking up
> >> the address of a magic function. This approach has required special
> >> support not just in Go, but also in the dynamic linker and gdb, and
> >> does not work well for statically linked binaries. The support in gdb
> >> is perhaps a good idea, but elsewhere it does not make sense.
> >>
> >> So why not provide a simple interface?
> >
> > What good would it do now that everyone already supports it?
>
> Do statically linked binaries use the vDSO calls?

Under glibc, I believe so (not checked). Under musl, yes, and even in
the dynamic-linked case we use the same code that's used for static
linking rather than trying to get the dynamic linker to do them
correctly. I still have some cruft lying around from where (in the
past) we tried to do it via the dynamic linker, but I'm probably going
to remove that and make the vdso behave as RTLD_LOCAL so that there's
no risk of weird symbols it exports interfering with the application
(applications could still make it global via an explicit dlopen). The
only reason for keeping it around at all in the dynamic linker is for
the sake of gdb.

Rich

2014-06-20 15:55:46

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Mon, Jun 16, 2014 at 8:42 AM, Rich Felker <[email protected]> wrote:
> On Mon, Jun 16, 2014 at 08:31:39AM -0700, Ian Lance Taylor wrote:
>> On Mon, Jun 16, 2014 at 7:38 AM, Andi Kleen <[email protected]> wrote:
>> >> I think this issue started when some of the Go developers questioned
>> >> why the kernel needed to provide a very complex interface--parsing an
>> >> ELF shared shared library--for very simple functionality--looking up
>> >> the address of a magic function. This approach has required special
>> >> support not just in Go, but also in the dynamic linker and gdb, and
>> >> does not work well for statically linked binaries. The support in gdb
>> >> is perhaps a good idea, but elsewhere it does not make sense.
>> >>
>> >> So why not provide a simple interface?
>> >
>> > What good would it do now that everyone already supports it?
>>
>> Do statically linked binaries use the vDSO calls?
>
> Under glibc, I believe so (not checked). Under musl, yes, and even in
> the dynamic-linked case we use the same code that's used for static
> linking rather than trying to get the dynamic linker to do them
> correctly. I still have some cruft lying around from where (in the
> past) we tried to do it via the dynamic linker, but I'm probably going
> to remove that and make the vdso behave as RTLD_LOCAL so that there's
> no risk of weird symbols it exports interfering with the application
> (applications could still make it global via an explicit dlopen). The
> only reason for keeping it around at all in the dynamic linker is for
> the sake of gdb.
>

What about backtrace_symbols, dl_addr, and the unwinder (e.g.
siglongjmp)? It would be nice to wean the vdso off of frame pointers
some day.

--Andy

2014-06-20 16:08:33

by Rich Felker

[permalink] [raw]
Subject: Re: [RFC 0/2] __vdso_findsym

On Fri, Jun 20, 2014 at 08:55:01AM -0700, Andy Lutomirski wrote:
> On Mon, Jun 16, 2014 at 8:42 AM, Rich Felker <[email protected]> wrote:
> > On Mon, Jun 16, 2014 at 08:31:39AM -0700, Ian Lance Taylor wrote:
> >> On Mon, Jun 16, 2014 at 7:38 AM, Andi Kleen <[email protected]> wrote:
> >> >> I think this issue started when some of the Go developers questioned
> >> >> why the kernel needed to provide a very complex interface--parsing an
> >> >> ELF shared shared library--for very simple functionality--looking up
> >> >> the address of a magic function. This approach has required special
> >> >> support not just in Go, but also in the dynamic linker and gdb, and
> >> >> does not work well for statically linked binaries. The support in gdb
> >> >> is perhaps a good idea, but elsewhere it does not make sense.
> >> >>
> >> >> So why not provide a simple interface?
> >> >
> >> > What good would it do now that everyone already supports it?
> >>
> >> Do statically linked binaries use the vDSO calls?
> >
> > Under glibc, I believe so (not checked). Under musl, yes, and even in
> > the dynamic-linked case we use the same code that's used for static
> > linking rather than trying to get the dynamic linker to do them
> > correctly. I still have some cruft lying around from where (in the
> > past) we tried to do it via the dynamic linker, but I'm probably going
> > to remove that and make the vdso behave as RTLD_LOCAL so that there's
> > no risk of weird symbols it exports interfering with the application
> > (applications could still make it global via an explicit dlopen). The
> > only reason for keeping it around at all in the dynamic linker is for
> > the sake of gdb.
>
> What about backtrace_symbols, dl_addr, and the unwinder (e.g.
> siglongjmp)? It would be nice to wean the vdso off of frame pointers
> some day.

My comments were in the context of musl, which doesn't use unwinding
internally whatsoever, but if you're asking about the usefulness of
being able to see the vdso's elf/dwarf2/etc. stuff for various
outside-of-libc purposes, then yes. My statement "for the sake of gdb"
was just a poor substitute for this concept (debugging/introspective
use in general).

Rich