Subject: Re: [PATCH 3/3] x86/xen: Move paravirt IOPL switching to slow the
 path
To: Brian Gerst <brgerst@gmail.com>
Cc: the arch/x86 maintainers <x86@kernel.org>,
        Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
        Ingo Molnar <mingo@kernel.org>, "H . Peter Anvin" <hpa@zytor.com>,
        Andy Lutomirski <luto@amacapital.net>,
        Boris Ostrovsky <boris.ostrovsky@oracle.com>
References: <20170614124032.4159-1-brgerst@gmail.com>
 <20170614124032.4159-4-brgerst@gmail.com>
 <aab4f23d-8c12-6a70-cf1d-99dc16d1f4af@suse.com>
 <CAMzpN2jfs7Gw2DqB=zYbtMAdZ0HzSc-UjYU1hwmjz0nZ1YKAPQ@mail.gmail.com>
From: Juergen Gross <jgross@suse.com>
Message-ID: <219db3ef-ab6e-ed0f-03bc-e4646e220d74@suse.com>
Date: Wed, 14 Jun 2017 19:34:42 +0200
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101
 Thunderbird/52.1.1
MIME-Version: 1.0
In-Reply-To: <CAMzpN2jfs7Gw2DqB=zYbtMAdZ0HzSc-UjYU1hwmjz0nZ1YKAPQ@mail.gmail.com>
Content-Type: text/plain; charset=utf-8
Content-Language: de-DE
Content-Transfer-Encoding: 7bit
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 5528
Lines: 121

On 14/06/17 19:29, Brian Gerst wrote:
> On Wed, Jun 14, 2017 at 9:14 AM, Juergen Gross <jgross@suse.com> wrote:
>> On 14/06/17 14:40, Brian Gerst wrote:
>>> Since tasks using IOPL are very rare, move the switching code to the slow
>>> path for lower impact on normal tasks.
>>>
>>> Signed-off-by: Brian Gerst <brgerst@gmail.com>
>>> ---
>>>  arch/x86/include/asm/paravirt.h       |  6 ++++++
>>>  arch/x86/include/asm/processor.h      |  1 +
>>>  arch/x86/include/asm/thread_info.h    |  5 ++++-
>>>  arch/x86/include/asm/xen/hypervisor.h |  2 --
>>>  arch/x86/kernel/ioport.c              |  6 ++++++
>>>  arch/x86/kernel/process.c             |  8 ++++++++
>>>  arch/x86/kernel/process_32.c          |  9 ---------
>>>  arch/x86/kernel/process_64.c          | 11 -----------
>>>  arch/x86/xen/enlighten_pv.c           |  2 +-
>>>  9 files changed, 26 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
>>> index 9a15739..2145dbd 100644
>>> --- a/arch/x86/include/asm/paravirt.h
>>> +++ b/arch/x86/include/asm/paravirt.h
>>> @@ -265,6 +265,12 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
>>>  {
>>>       PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
>>>  }
>>> +
>>> +static inline bool paravirt_iopl(void)
>>> +{
>>> +     return pv_cpu_ops.set_iopl_mask != paravirt_nop;
>>> +}
>>> +
>>>  static inline void set_iopl_mask(unsigned mask)
>>>  {
>>>       PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
>>> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
>>> index 06c4795..4411d67 100644
>>> --- a/arch/x86/include/asm/processor.h
>>> +++ b/arch/x86/include/asm/processor.h
>>> @@ -523,6 +523,7 @@ static inline void load_sp0(struct tss_struct *tss,
>>>       native_load_sp0(tss, thread);
>>>  }
>>>
>>> +static inline bool paravirt_iopl(void) { return false; }
>>>  static inline void set_iopl_mask(unsigned mask) { }
>>>  #endif /* CONFIG_PARAVIRT */
>>>
>>> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
>>> index e00e1bd..350f3d3 100644
>>> --- a/arch/x86/include/asm/thread_info.h
>>> +++ b/arch/x86/include/asm/thread_info.h
>>> @@ -79,6 +79,7 @@ struct thread_info {
>>>  #define TIF_SIGPENDING               2       /* signal pending */
>>>  #define TIF_NEED_RESCHED     3       /* rescheduling necessary */
>>>  #define TIF_SINGLESTEP               4       /* reenable singlestep on user return*/
>>> +#define TIF_PV_IOPL          5       /* call hypervisor to change IOPL */
>>>  #define TIF_SYSCALL_EMU              6       /* syscall emulation active */
>>>  #define TIF_SYSCALL_AUDIT    7       /* syscall auditing active */
>>>  #define TIF_SECCOMP          8       /* secure computing */
>>> @@ -104,6 +105,7 @@ struct thread_info {
>>>  #define _TIF_SIGPENDING              (1 << TIF_SIGPENDING)
>>>  #define _TIF_NEED_RESCHED    (1 << TIF_NEED_RESCHED)
>>>  #define _TIF_SINGLESTEP              (1 << TIF_SINGLESTEP)
>>> +#define _TIF_PV_IOPL         (1 << TIF_PV_IOPL)
>>>  #define _TIF_SYSCALL_EMU     (1 << TIF_SYSCALL_EMU)
>>>  #define _TIF_SYSCALL_AUDIT   (1 << TIF_SYSCALL_AUDIT)
>>>  #define _TIF_SECCOMP         (1 << TIF_SECCOMP)
>>> @@ -141,7 +143,8 @@ struct thread_info {
>>>
>>>  /* flags to check in __switch_to() */
>>>  #define _TIF_WORK_CTXSW                                                      \
>>> -     (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
>>> +     (_TIF_IO_BITMAP | _TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP |  \
>>> +      _TIF_PV_IOPL)
>>>
>>>  #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
>>>  #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
>>> diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
>>> index 39171b3..8b2d4be 100644
>>> --- a/arch/x86/include/asm/xen/hypervisor.h
>>> +++ b/arch/x86/include/asm/xen/hypervisor.h
>>> @@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num);
>>>  void xen_arch_unregister_cpu(int num);
>>>  #endif
>>>
>>> -extern void xen_set_iopl_mask(unsigned mask);
>>> -
>>>  #endif /* _ASM_X86_XEN_HYPERVISOR_H */
>>> diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
>>> index 9c3cf09..2051e7d 100644
>>> --- a/arch/x86/kernel/ioport.c
>>> +++ b/arch/x86/kernel/ioport.c
>>> @@ -126,6 +126,12 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
>>>       regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
>>>               (level << X86_EFLAGS_IOPL_BIT);
>>>       t->iopl = level << X86_EFLAGS_IOPL_BIT;
>>> +     if (paravirt_iopl()) {
>>> +             if (level > 0)
>>> +                     set_thread_flag(TIF_PV_IOPL);
>>> +             else
>>> +                     clear_thread_flag(TIF_PV_IOPL);
>>> +     }
>>
>> You could get rid of paravirt_iopl() by adding a "setflag" boolean
>> parameter to set_iopl_mask(). Only the Xen variant would need above
>> thread flag manipulation.
> 
> After the first two patches, the hook is a no-op except in the Xen
> case, so the thread flag change gets skipped except on Xen.  Moving
> the code to Xen would work, but it would mean that any other
> hypervisor using that hook would also need to duplicate the thread
> flag changes.  Granted, it looks like Xen is doing something unique
> here (running the guest kernel at CPL 1), so that probably won't be an
> issue.

Just create a service function which is doing the job. Xen should call
it and any other future non-standard user of set_iopl_mask(), too.


Juergen