There is no requirement to update the TSS I/O bitmap when a thread using it is
scheduled out and the incoming thread does not use it.
For the permission check based on the TSS I/O bitmap the CPU calculates the memory
location of the I/O bitmap by the address of the TSS and the io_bitmap_base member
of the tss_struct. The easiest way to invalidate the I/O bitmap is to switch the
offset to an address outside of the TSS limit.
If an I/O instruction is issued from user space the TSS limit causes #GP to be
raised in the same was as valid I/O bitmap with all bits set to 1 would do.
This removes the extra work when an I/O bitmap using task is scheduled out
and puts the burden on the rare I/O bitmap users when they are scheduled
in.
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/include/asm/processor.h | 38 +++++++++++++++++--------
arch/x86/kernel/cpu/common.c | 3 +
arch/x86/kernel/doublefault.c | 2 -
arch/x86/kernel/process.c | 59 +++++++++++++++++++++------------------
4 files changed, 61 insertions(+), 41 deletions(-)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -330,8 +330,23 @@ struct x86_hw_tss {
#define IO_BITMAP_BITS 65536
#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
-#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+#define IO_BITMAP_OFFSET_VALID \
+ (offsetof(struct tss_struct, io_bitmap) - \
+ offsetof(struct tss_struct, x86_tss))
+
+/*
+ * sizeof(unsigned long) coming from an extra "long" at the end
+ * of the iobitmap.
+ *
+ * -1? seg base+limit should be pointing to the address of the
+ * last valid byte
+ */
+#define __KERNEL_TSS_LIMIT \
+ (IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
+
+/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
+#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
struct entry_stack {
unsigned long words[64];
@@ -350,6 +365,15 @@ struct tss_struct {
struct x86_hw_tss x86_tss;
/*
+ * Store the dirty size of the last io bitmap offender. The next
+ * one will have to do the cleanup as the switch out to a non
+ * io bitmap user will just set x86_tss.io_bitmap_base to a value
+ * outside of the TSS limit. So for sane tasks there is no need
+ * to actually touch the io_bitmap at all.
+ */
+ unsigned int io_bitmap_prev_max;
+
+ /*
* The extra 1 is there because the CPU will access an
* additional byte beyond the end of the IO permission
* bitmap. The extra byte must be all 1 bits, and must
@@ -360,16 +384,6 @@ struct tss_struct {
DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
-/*
- * sizeof(unsigned long) coming from an extra "long" at the end
- * of the iobitmap.
- *
- * -1? seg base+limit should be pointing to the address of the
- * last valid byte
- */
-#define __KERNEL_TSS_LIMIT \
- (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
-
/* Per CPU interrupt stacks */
struct irq_stack {
char stack[IRQ_STACK_SIZE];
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1863,7 +1863,8 @@ void cpu_init(void)
/* Initialize the TSS. */
tss_setup_ist(tss);
- tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
+ tss->io_bitmap_prev_max = 0;
memset(tss->io_bitmap, 0xff, sizeof(tss->io_bitmap));
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -54,7 +54,7 @@ struct x86_hw_tss doublefault_tss __cach
.sp0 = STACK_START,
.ss0 = __KERNEL_DS,
.ldt = 0,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+ .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
.ip = (unsigned long) doublefault_fn,
/* 0x2 bit is always set */
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -72,18 +72,9 @@
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
#endif
+ .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
},
-#ifdef CONFIG_X86_32
- /*
- * Note that the .io_bitmap member must be extra-big. This is because
- * the CPU will access an additional byte beyond the end of the IO
- * permission bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
- .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
-#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
@@ -112,18 +103,18 @@ void exit_thread(struct task_struct *tsk
struct thread_struct *t = &tsk->thread;
unsigned long *bp = t->io_bitmap_ptr;
struct fpu *fpu = &t->fpu;
+ struct tss_struct *tss;
if (bp) {
- struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
+ preempt_disable();
+ tss = this_cpu_ptr(&cpu_tss_rw);
t->io_bitmap_ptr = NULL;
- clear_thread_flag(TIF_IO_BITMAP);
- /*
- * Careful, clear this in the TSS too:
- */
- memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
t->io_bitmap_max = 0;
- put_cpu();
+ clear_thread_flag(TIF_IO_BITMAP);
+ /* Invalidate the io bitmap base in the TSS */
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
+ preempt_enable();
kfree(bp);
}
@@ -363,29 +354,43 @@ void arch_setup_new_exec(void)
}
}
-static inline void switch_to_bitmap(struct thread_struct *prev,
- struct thread_struct *next,
+static inline void switch_to_bitmap(struct thread_struct *next,
unsigned long tifp, unsigned long tifn)
{
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
if (tifn & _TIF_IO_BITMAP) {
/*
- * Copy the relevant range of the IO bitmap.
- * Normally this is 128 bytes or less:
+ * Copy at least the size of the incoming tasks bitmap
+ * which covers the last permitted I/O port.
+ *
+ * If the previous task which used an io bitmap had more
+ * bits permitted, then the copy needs to cover those as
+ * well so they get turned off.
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
- max(prev->io_bitmap_max, next->io_bitmap_max));
+ max(tss->io_bitmap_prev_max, next->io_bitmap_max));
+
+ /* Store the new max and set io_bitmap_base valid */
+ tss->io_bitmap_prev_max = next->io_bitmap_max;
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID;
+
/*
- * Make sure that the TSS limit is correct for the CPU
- * to notice the IO bitmap.
+ * Make sure that the TSS limit is covering the io bitmap.
+ * It might have been cut down by a VMEXIT to 0x67 which
+ * would cause a subsequent I/O access from user space to
+ * trigger a #GP because tbe bitmap is outside the TSS
+ * limit.
*/
refresh_tss_limit();
} else if (tifp & _TIF_IO_BITMAP) {
/*
- * Clear any possible leftover bits:
+ * Do not touch the bitmap. Let the next bitmap using task
+ * deal with the mess. Just make the io_bitmap_base invalid
+ * by moving it outside the TSS limit so any subsequent I/O
+ * access from user space will trigger a #GP.
*/
- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
}
}
@@ -599,7 +604,7 @@ void __switch_to_xtra(struct task_struct
tifn = READ_ONCE(task_thread_info(next_p)->flags);
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
- switch_to_bitmap(prev, next, tifp, tifn);
+ switch_to_bitmap(next, tifp, tifn);
propagate_user_return_notify(prev_p, next_p);
On Wed, Nov 06, 2019 at 08:35:03PM +0100, Thomas Gleixner wrote:
> There is no requirement to update the TSS I/O bitmap when a thread using it is
> scheduled out and the incoming thread does not use it.
>
> For the permission check based on the TSS I/O bitmap the CPU calculates the memory
> location of the I/O bitmap by the address of the TSS and the io_bitmap_base member
> of the tss_struct. The easiest way to invalidate the I/O bitmap is to switch the
> offset to an address outside of the TSS limit.
>
> If an I/O instruction is issued from user space the TSS limit causes #GP to be
> raised in the same was as valid I/O bitmap with all bits set to 1 would do.
>
> This removes the extra work when an I/O bitmap using task is scheduled out
> and puts the burden on the rare I/O bitmap users when they are scheduled
> in.
This also nicely aligns with that the context switch time is accounted
to the next task. So by doing the expensive part on switch-in gets it
all accounted to the task that caused it.
On Thu, 7 Nov 2019, Peter Zijlstra wrote:
> On Wed, Nov 06, 2019 at 08:35:03PM +0100, Thomas Gleixner wrote:
> > There is no requirement to update the TSS I/O bitmap when a thread using it is
> > scheduled out and the incoming thread does not use it.
> >
> > For the permission check based on the TSS I/O bitmap the CPU calculates the memory
> > location of the I/O bitmap by the address of the TSS and the io_bitmap_base member
> > of the tss_struct. The easiest way to invalidate the I/O bitmap is to switch the
> > offset to an address outside of the TSS limit.
> >
> > If an I/O instruction is issued from user space the TSS limit causes #GP to be
> > raised in the same was as valid I/O bitmap with all bits set to 1 would do.
> >
> > This removes the extra work when an I/O bitmap using task is scheduled out
> > and puts the burden on the rare I/O bitmap users when they are scheduled
> > in.
>
> This also nicely aligns with that the context switch time is accounted
> to the next task. So by doing the expensive part on switch-in gets it
> all accounted to the task that caused it.
Just that I can't add the storage to tss_struct due to the VMX insanity of
setting TSS limit hard to 0x67 on vmexit instead of restoring the host
value.
Did I say that already that virt creates more problems than it solves?
Thanks,
tglx
On Thu, 7 Nov 2019, Thomas Gleixner wrote:
> On Thu, 7 Nov 2019, Peter Zijlstra wrote:
> > On Wed, Nov 06, 2019 at 08:35:03PM +0100, Thomas Gleixner wrote:
> > > There is no requirement to update the TSS I/O bitmap when a thread using it is
> > > scheduled out and the incoming thread does not use it.
> > >
> > > For the permission check based on the TSS I/O bitmap the CPU calculates the memory
> > > location of the I/O bitmap by the address of the TSS and the io_bitmap_base member
> > > of the tss_struct. The easiest way to invalidate the I/O bitmap is to switch the
> > > offset to an address outside of the TSS limit.
> > >
> > > If an I/O instruction is issued from user space the TSS limit causes #GP to be
> > > raised in the same was as valid I/O bitmap with all bits set to 1 would do.
> > >
> > > This removes the extra work when an I/O bitmap using task is scheduled out
> > > and puts the burden on the rare I/O bitmap users when they are scheduled
> > > in.
> >
> > This also nicely aligns with that the context switch time is accounted
> > to the next task. So by doing the expensive part on switch-in gets it
> > all accounted to the task that caused it.
>
> Just that I can't add the storage to tss_struct due to the VMX insanity of
> setting TSS limit hard to 0x67 on vmexit instead of restoring the host
> value.
Well, I can. The build bugon in vmx.c is just bogus.
> Did I say that already that virt creates more problems than it solves?
Still stands.
tglx
On 11/7/19 6:08 AM, Thomas Gleixner wrote:
> On Thu, 7 Nov 2019, Thomas Gleixner wrote:
>> On Thu, 7 Nov 2019, Peter Zijlstra wrote:
>>> On Wed, Nov 06, 2019 at 08:35:03PM +0100, Thomas Gleixner wrote:
>>>> There is no requirement to update the TSS I/O bitmap when a thread using it is
>>>> scheduled out and the incoming thread does not use it.
>>>>
>>>> For the permission check based on the TSS I/O bitmap the CPU calculates the memory
>>>> location of the I/O bitmap by the address of the TSS and the io_bitmap_base member
>>>> of the tss_struct. The easiest way to invalidate the I/O bitmap is to switch the
>>>> offset to an address outside of the TSS limit.
>>>>
>>>> If an I/O instruction is issued from user space the TSS limit causes #GP to be
>>>> raised in the same was as valid I/O bitmap with all bits set to 1 would do.
>>>>
>>>> This removes the extra work when an I/O bitmap using task is scheduled out
>>>> and puts the burden on the rare I/O bitmap users when they are scheduled
>>>> in.
>>>
>>> This also nicely aligns with that the context switch time is accounted
>>> to the next task. So by doing the expensive part on switch-in gets it
>>> all accounted to the task that caused it.
>>
>> Just that I can't add the storage to tss_struct due to the VMX insanity of
>> setting TSS limit hard to 0x67 on vmexit instead of restoring the host
>> value.
>
> Well, I can. The build bugon in vmx.c is just bogus.
SDM vol 3 27.5.2 says the BUILD_BUG_ON is right. Or am I
misunderstanding you?
I'm reasonably confident that the TSS limit is indeed 0x67 after VM
exit, and I wrote the existing code that tries to optimize this to avoid
LTR when not needed.
On Fri, 8 Nov 2019, Andy Lutomirski wrote:
> On 11/7/19 6:08 AM, Thomas Gleixner wrote:
> > On Thu, 7 Nov 2019, Thomas Gleixner wrote:
> >> Just that I can't add the storage to tss_struct due to the VMX insanity of
> >> setting TSS limit hard to 0x67 on vmexit instead of restoring the host
> >> value.
> >
> > Well, I can. The build bugon in vmx.c is just bogus.
>
> SDM vol 3 27.5.2 says the BUILD_BUG_ON is right. Or am I
> misunderstanding you?
>
> I'm reasonably confident that the TSS limit is indeed 0x67 after VM
> exit, and I wrote the existing code that tries to optimize this to avoid
> LTR when not needed.
The BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 == 0x67) in the VMX code is bogus in
two aspects:
1) This wants to be in generic x86 code
2) The IO_BITMAP_OFFSET is not the right thing to check because it makes
asssumptions about the layout of tss_struct. Nothing requires that the
I/O bitmap is placed right after x86_tss, which is the hardware mandated
tss structure. It pointlessly makes restrictions on the struct
tss_struct layout.
The proper thing to check is:
- Offset of x86_tss in tss_struct is 0
- Size of x86_tss == 0x68
We already have the page alignment sanity check off TSS in
cpu_entry_area.c. That's where this should have gone into in the first
place.
Thanks,
tglx
On 11/6/19 11:35 AM, Thomas Gleixner wrote:
> There is no requirement to update the TSS I/O bitmap when a thread using it is
> scheduled out and the incoming thread does not use it.
>
> For the permission check based on the TSS I/O bitmap the CPU calculates the memory
> location of the I/O bitmap by the address of the TSS and the io_bitmap_base member
> of the tss_struct. The easiest way to invalidate the I/O bitmap is to switch the
> offset to an address outside of the TSS limit.
>
> If an I/O instruction is issued from user space the TSS limit causes #GP to be
> raised in the same was as valid I/O bitmap with all bits set to 1 would do.
>
> This removes the extra work when an I/O bitmap using task is scheduled out
> and puts the burden on the rare I/O bitmap users when they are scheduled
> in.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> arch/x86/include/asm/processor.h | 38 +++++++++++++++++--------
> arch/x86/kernel/cpu/common.c | 3 +
> arch/x86/kernel/doublefault.c | 2 -
> arch/x86/kernel/process.c | 59 +++++++++++++++++++++------------------
> 4 files changed, 61 insertions(+), 41 deletions(-)
>
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -330,8 +330,23 @@ struct x86_hw_tss {
> #define IO_BITMAP_BITS 65536
> #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
> #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
> -#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
> -#define INVALID_IO_BITMAP_OFFSET 0x8000
> +
> +#define IO_BITMAP_OFFSET_VALID \
> + (offsetof(struct tss_struct, io_bitmap) - \
> + offsetof(struct tss_struct, x86_tss))
> +
> +/*
> + * sizeof(unsigned long) coming from an extra "long" at the end
> + * of the iobitmap.
> + *
> + * -1? seg base+limit should be pointing to the address of the
> + * last valid byte
What's with the '?'
> + */
> +#define __KERNEL_TSS_LIMIT \
> + (IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
> +
> +/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
> +#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
>
> struct entry_stack {
> unsigned long words[64];
> @@ -350,6 +365,15 @@ struct tss_struct {
> struct x86_hw_tss x86_tss;
>
> /*
> + * Store the dirty size of the last io bitmap offender. The next
> + * one will have to do the cleanup as the switch out to a non
> + * io bitmap user will just set x86_tss.io_bitmap_base to a value
> + * outside of the TSS limit. So for sane tasks there is no need
> + * to actually touch the io_bitmap at all.
> + */
> + unsigned int io_bitmap_prev_max;
Hmm. I'm wondering if a clearer way to say this would be:
io_bitmap_max_allowed_byte: offset from the start of the io bitmap to
the first byte that might contain a zero bit. If we switch from a task
that uses ioperm() to one that does not, we will invalidate the io
bitmap by changing the offset. The next task that starts using the io
bitmap again will need to make sure it updates all the bytes through
io_bitmap_max_allowed_byte.
But your description is okay, too.
It's worth noting that, due to Meltdown, this patch leaks the io bitmap
of io bitmap-using tasks. I'm not sure I care.
> +
> + /*
> * The extra 1 is there because the CPU will access an
> * additional byte beyond the end of the IO permission
> * bitmap. The extra byte must be all 1 bits, and must
> @@ -360,16 +384,6 @@ struct tss_struct {
>
> DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
>
> -/*
> - * sizeof(unsigned long) coming from an extra "long" at the end
> - * of the iobitmap.
> - *
> - * -1? seg base+limit should be pointing to the address of the
> - * last valid byte
> - */
> -#define __KERNEL_TSS_LIMIT \
> - (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
> -
> /* Per CPU interrupt stacks */
> struct irq_stack {
> char stack[IRQ_STACK_SIZE];
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1863,7 +1863,8 @@ void cpu_init(void)
>
> /* Initialize the TSS. */
> tss_setup_ist(tss);
> - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
> + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
> + tss->io_bitmap_prev_max = 0;
> memset(tss->io_bitmap, 0xff, sizeof(tss->io_bitmap));
> set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
>
> --- a/arch/x86/kernel/doublefault.c
> +++ b/arch/x86/kernel/doublefault.c
> @@ -54,7 +54,7 @@ struct x86_hw_tss doublefault_tss __cach
> .sp0 = STACK_START,
> .ss0 = __KERNEL_DS,
> .ldt = 0,
> - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
> + .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
>
> .ip = (unsigned long) doublefault_fn,
> /* 0x2 bit is always set */
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -72,18 +72,9 @@
> #ifdef CONFIG_X86_32
> .ss0 = __KERNEL_DS,
> .ss1 = __KERNEL_CS,
> - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
> #endif
> + .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
> },
> -#ifdef CONFIG_X86_32
> - /*
> - * Note that the .io_bitmap member must be extra-big. This is because
> - * the CPU will access an additional byte beyond the end of the IO
> - * permission bitmap. The extra byte must be all 1 bits, and must
> - * be within the limit.
> - */
> - .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
> -#endif
> };
> EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
>
> @@ -112,18 +103,18 @@ void exit_thread(struct task_struct *tsk
> struct thread_struct *t = &tsk->thread;
> unsigned long *bp = t->io_bitmap_ptr;
> struct fpu *fpu = &t->fpu;
> + struct tss_struct *tss;
>
> if (bp) {
> - struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
> + preempt_disable();
> + tss = this_cpu_ptr(&cpu_tss_rw);
>
> t->io_bitmap_ptr = NULL;
> - clear_thread_flag(TIF_IO_BITMAP);
> - /*
> - * Careful, clear this in the TSS too:
> - */
> - memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
> t->io_bitmap_max = 0;
> - put_cpu();
> + clear_thread_flag(TIF_IO_BITMAP);
> + /* Invalidate the io bitmap base in the TSS */
> + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
The first time I read this, I thought this code was all unnecessary.
But now I think I understand. How about a comment:
/*
* We're about to free the IO bitmap. We need to disable it in the TSS
* too so that switch_to() doesn't see an inconsistent state.
*/
In any case:
Acked-by: Andy Lutomirski <[email protected]>
> On Nov 8, 2019, at 3:45 PM, Thomas Gleixner <[email protected]> wrote:
>
> On Fri, 8 Nov 2019, Andy Lutomirski wrote:
>>> On 11/7/19 6:08 AM, Thomas Gleixner wrote:
>>> On Thu, 7 Nov 2019, Thomas Gleixner wrote:
>>>> Just that I can't add the storage to tss_struct due to the VMX insanity of
>>>> setting TSS limit hard to 0x67 on vmexit instead of restoring the host
>>>> value.
>>>
>>> Well, I can. The build bugon in vmx.c is just bogus.
>>
>> SDM vol 3 27.5.2 says the BUILD_BUG_ON is right. Or am I
>> misunderstanding you?
>>
>> I'm reasonably confident that the TSS limit is indeed 0x67 after VM
>> exit, and I wrote the existing code that tries to optimize this to avoid
>> LTR when not needed.
>
> The BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 == 0x67) in the VMX code is bogus in
> two aspects:
>
> 1) This wants to be in generic x86 code
I think disagree. The only thing special about 0x67 is that VMX hard codes it. It’s specifically a VMX-ism. So I think the VMX code should indeed assert that 0x67 is a safe value.
>
> 2) The IO_BITMAP_OFFSET is not the right thing to check because it makes
> asssumptions about the layout of tss_struct. Nothing requires that the
> I/O bitmap is placed right after x86_tss, which is the hardware mandated
> tss structure. It pointlessly makes restrictions on the struct
> tss_struct layout.
I agree with this.
>
> The proper thing to check is:
>
> - Offset of x86_tss in tss_struct is 0
> - Size of x86_tss == 0x68
>
> We already have the page alignment sanity check off TSS in
> cpu_entry_area.c. That's where this should have gone into in the first
> place.
>
> Thanks,
>
> tglx
On Fri, 8 Nov 2019, Andy Lutomirski wrote:
> > On Nov 8, 2019, at 3:45 PM, Thomas Gleixner <[email protected]> wrote:
> > On Fri, 8 Nov 2019, Andy Lutomirski wrote:
> >> SDM vol 3 27.5.2 says the BUILD_BUG_ON is right. Or am I
> >> misunderstanding you?
> >>
> >> I'm reasonably confident that the TSS limit is indeed 0x67 after VM
> >> exit, and I wrote the existing code that tries to optimize this to avoid
> >> LTR when not needed.
> >
> > The BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 == 0x67) in the VMX code is bogus in
> > two aspects:
> >
> > 1) This wants to be in generic x86 code
>
> I think disagree. The only thing special about 0x67 is that VMX hard
> codes it. It’s specifically a VMX-ism. So I think the VMX code should
> indeed assert that 0x67 is a safe value.
Yes, it is a VMX specific issue, but I really prefer the build to fail in
the common code without having to enable VMX if something changes the
TSS layout in incompatible ways.
Thanks,
tglx