For simple user space timing it's very useful to be able to do
#include <asm/msr.h> instead of having to cut'n'paste the necessarily
macros into all programs. This used to work with older kernels,
but broke with the paravirt support (I ended up with a bunch of
test programs that broke because of that). But it's very useful
to have rdtscll() available somewhere in user space and asm/msr.h
is a good place to have them.
Provide simple rdtsc/rdtscl() macros for user space too in asm/msr.h.
Since they are very simple I don't think they are a burden to maintain.
The diff looks bigger than it is because I moved a code block
and diff doesn't handle it very well.
Signed-off-by: Andi Kleen <[email protected]>
Index: linux-2.6.27-rc4-misc/include/asm-x86/msr.h
===================================================================
--- linux-2.6.27-rc4-misc.orig/include/asm-x86/msr.h
+++ linux-2.6.27-rc4-misc/include/asm-x86/msr.h
@@ -4,23 +4,6 @@
#include <asm/msr-index.h>
#ifndef __ASSEMBLY__
-# include <linux/types.h>
-#endif
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
-#include <asm/asm.h>
-#include <asm/errno.h>
-
-static inline unsigned long long native_read_tscp(unsigned int *aux)
-{
- unsigned long low, high;
- asm volatile(".byte 0x0f,0x01,0xf9"
- : "=a" (low), "=d" (high), "=c" (*aux));
- return low | ((u64)high << 32);
-}
-
/*
* i386 calling convention returns 64-bit value in edx:eax, while
* x86_64 returns at rax. Also, the "A" constraint does not really
@@ -29,7 +12,7 @@ static inline unsigned long long native_
*/
#ifdef CONFIG_X86_64
#define DECLARE_ARGS(val, low, high) unsigned low, high
-#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32))
+#define EAX_EDX_VAL(val, low, high) ((low) | ((__u64)(high) << 32))
#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high)
#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
#else
@@ -39,6 +22,23 @@ static inline unsigned long long native_
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
+#endif
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/asm.h>
+#include <asm/errno.h>
+
+static inline unsigned long long native_read_tscp(unsigned int *aux)
+{
+ unsigned long low, high;
+ asm volatile(".byte 0x0f,0x01,0xf9"
+ : "=a" (low), "=d" (high), "=c" (*aux));
+ return low | ((u64)high << 32);
+}
+
static inline unsigned long long native_read_msr(unsigned int msr)
{
DECLARE_ARGS(val, low, high);
@@ -217,6 +217,24 @@ static inline int wrmsr_safe_on_cpu(unsi
}
#endif /* CONFIG_SMP */
#endif /* __ASSEMBLY__ */
+
+#else
+
+/*
+ * Provide simple rdtsc macros for non kernel users. These differ
+ * from the kernel versions by not having barriers and not supporting
+ * paravirtualization.
+ */
+static __always_inline unsigned long long __read_tsc(void)
+{
+ DECLARE_ARGS(val, low, high);
+ asm volatile("rdtsc" : EAX_EDX_RET(val, low, high));
+ return EAX_EDX_VAL(val, low, high);
+}
+
+#define rdtscl(low) ({ asm volatile("rdtsc" : "=a" (low) :: "d"); })
+#define rdtscll(val) ((val) = __read_tsc())
+
#endif /* __KERNEL__ */
rdtscp was the only function to not use them.
Signed-off-by: Andi Kleen <[email protected]>
Index: linux-2.6.27-rc4-misc/include/asm-x86/msr.h
===================================================================
--- linux-2.6.27-rc4-misc.orig/include/asm-x86/msr.h
+++ linux-2.6.27-rc4-misc/include/asm-x86/msr.h
@@ -34,10 +34,10 @@
static inline unsigned long long native_read_tscp(unsigned int *aux)
{
- unsigned long low, high;
- asm volatile(".byte 0x0f,0x01,0xf9"
- : "=a" (low), "=d" (high), "=c" (*aux));
- return low | ((u64)high << 32);
+ DECLARE_ARGS(val, low, high);
+ asm volatile(".byte 0x0f,0x01,0xf9" :
+ EAX_EDX_RET(val, low, high), "=c" (*aux));
+ return EAX_EDX_VAL(val, low, high);
}
static inline unsigned long long native_read_msr(unsigned int msr)
Signed-off-by: Andi Kleen <[email protected]>
Index: linux-2.6.27-rc4-misc/include/asm-x86/msr.h
===================================================================
--- linux-2.6.27-rc4-misc.orig/include/asm-x86/msr.h
+++ linux-2.6.27-rc4-misc/include/asm-x86/msr.h
@@ -15,12 +15,10 @@
#ifdef CONFIG_X86_64
#define DECLARE_ARGS(val, low, high) unsigned low, high
#define EAX_EDX_VAL(val, low, high) ((low) | ((__u64)(high) << 32))
-#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high)
#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
#else
#define DECLARE_ARGS(val, low, high) unsigned long long val
#define EAX_EDX_VAL(val, low, high) (val)
-#define EAX_EDX_ARGS(val, low, high) "A" (val)
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
Andi Kleen wrote:
> For simple user space timing it's very useful to be able to do
> #include <asm/msr.h> instead of having to cut'n'paste the necessarily
> macros into all programs. This used to work with older kernels,
> but broke with the paravirt support (I ended up with a bunch of
> test programs that broke because of that). But it's very useful
> to have rdtscll() available somewhere in user space and asm/msr.h
> is a good place to have them.
>
> Provide simple rdtsc/rdtscl() macros for user space too in asm/msr.h.
> Since they are very simple I don't think they are a burden to maintain.
>
> The diff looks bigger than it is because I moved a code block
> and diff doesn't handle it very well.
I really don't think this belongs in the kernel. It's not even a case
of "usable by accident" anymore, and hasn't worked for a while, so it's
not a matter of legacy, either.
Mixing fundamentally unrelated kernel and userspace variants of the same
function just makes the aggregation uglier than both.
(Also, most userspace variants I have seen have what the kernel calls
"rdtscll" and calls it "rdtsc".)
I would suggest writing a <sys/tsc.h> header file and submitting to the
glibc people, instead, or perhaps even better, start a libarch/libx86 tree.
-hpa
On Tue, Oct 07, 2008 at 02:32:05PM -0700, H. Peter Anvin wrote:
> I really don't think this belongs in the kernel. It's not even a case
> of "usable by accident" anymore, and hasn't worked for a while, so it's
> not a matter of legacy, either.
Actually it that's not true because most distros still use relatively old kernel
includes. It really only broke with paravirt, which especially on 64bit
is extremly new.
I think on a few of the latest distros actually break it in their
default setup, that is why I looked at fixing it (ran into this
in suse 11.0, in 10.3 it was all still ok)
>
> Mixing fundamentally unrelated kernel and userspace variants of the same
> function just makes the aggregation uglier than both.
I disagree on the fundamentally unrelated. They are the same semantically
(although the paravirt entry point is misnamed, it shouldn't call
itself RDTSC)
>
> (Also, most userspace variants I have seen have what the kernel calls
> "rdtscll" and calls it "rdtsc".)
At least asm/msr.h has used it like this forever (2.4).
> I would suggest writing a <sys/tsc.h> header file and submitting to the
> glibc people, instead, or perhaps even better, start a libarch/libx86 tree.
That wouldn't work on old kernels. asm/msr.h has been the traditional
interface for this and rdtsc has worked forever (at least dating back to 2.0)
rdtscll is a bit newer, but still in Linux terms ancient (2.4)
Also in my experience distributions are extremly slow at keeping up
with glibc, so even if this was added to glibc it would be probably
years before it would be actually usable :/ And I see about zero
point in changing a perfectly fine include name breaking everything old.
Anyways if you insist I can probably deal without
but I (and likely others) would think something nasty about you every time
I have to cut'n'paste that code again. Do you really want to risk that? @)
-Andi
--
[email protected]
Andi Kleen wrote:
>
>> I would suggest writing a <sys/tsc.h> header file and submitting to the
>> glibc people, instead, or perhaps even better, start a libarch/libx86 tree.
>
> That wouldn't work on old kernels. asm/msr.h has been the traditional
> interface for this and rdtsc has worked forever (at least dating back to 2.0)
> rdtscll is a bit newer, but still in Linux terms ancient (2.4)
>
It would definitely work on older kernels. Just don't stomp on the
kernel's namespace and you can do whatever you want.
> Also in my experience distributions are extremly slow at keeping up
> with glibc, so even if this was added to glibc it would be probably
> years before it would be actually usable :/ And I see about zero
> point in changing a perfectly fine include name breaking everything old.
So ship the include file with your utility. Hardly a big deal. But
yes, that's part of why I'm mentioning the idea of putting it in a
separate library, which can also house other low-level constructs.
> Anyways if you insist I can probably deal without
> but I (and likely others) would think something nasty about you every time
> I have to cut'n'paste that code again. Do you really want to risk that? @)
If you're really incapable of maintaining a single user-mode header file
with a handful of one-liners in a sane way, then definitely.
-hpa
On Tue, Oct 07, 2008 at 04:25:00PM -0700, H. Peter Anvin wrote:
> >Anyways if you insist I can probably deal without
> >but I (and likely others) would think something nasty about you every time
> >I have to cut'n'paste that code again. Do you really want to risk that? @)
>
> If you're really incapable of maintaining a single user-mode header file
> with a handful of one-liners in a sane way, then definitely.
These are typical benchmark or test programs which I like to write with
as little dependencies as possible because they compile/run on all kinds
of systems with often dubious setup.
The other issue is that everyone will have to do this now, which just
seems wrong to me.
-Andi (who never got this strange meme which is floating around
in the kernel community for some time that cut'n'pasting interfaces
into user programs is a sane thing. To me it seems insane.)
--
[email protected]
BTW even if you cannot get yourself to DTRT on the user interface please at
least apply the followup cleanup patches in the series.
-Andi
--
[email protected]
Andi Kleen wrote:
>
> -Andi (who never got this strange meme which is floating around
> in the kernel community for some time that cut'n'pasting interfaces
> into user programs is a sane thing. To me it seems insane.)
>
Not "cutting and pasting". Librarizing.
FWIW, I started a git tree for these kinds of stuff at:
git://git.kernel.org/pub/scm/libs/cpu/libx86/libx86.git
-hpa