Physical addresses on processors supporting 5 level paging can be up to
52 bits wide. For a Xen pv guest running on such a machine those
physical addresses have to be supported in order to be able to use any
memory on the machine even if the guest itself does not support 5 level
paging.
So when reading/writing a MFN from/to a pte don't use the kernel's
PTE_PFN_MASK but a new XEN_PTE_MFN_MASK allowing full 40 bit wide MFNs.
Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/include/asm/xen/page.h | 11 ++++++++++-
arch/x86/xen/mmu_pv.c | 4 ++--
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 07b6531813c4..bcb8b193c8d1 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -26,6 +26,15 @@ typedef struct xpaddr {
phys_addr_t paddr;
} xpaddr_t;
+#ifdef CONFIG_X86_64
+#define XEN_PHYSICAL_MASK ((1UL << 52) - 1)
+#else
+#define XEN_PHYSICAL_MASK __PHYSICAL_MASK
+#endif
+
+#define XEN_PTE_MFN_MASK ((pteval_t)(((signed long)PAGE_MASK) & \
+ XEN_PHYSICAL_MASK))
+
#define XMADDR(x) ((xmaddr_t) { .maddr = (x) })
#define XPADDR(x) ((xpaddr_t) { .paddr = (x) })
@@ -277,7 +286,7 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
static inline unsigned long pte_mfn(pte_t pte)
{
- return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
+ return (pte.pte & XEN_PTE_MFN_MASK) >> PAGE_SHIFT;
}
static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 509f560bd0c6..958d36d776d9 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -315,7 +315,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
static pteval_t pte_mfn_to_pfn(pteval_t val)
{
if (val & _PAGE_PRESENT) {
- unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
+ unsigned long mfn = (val & XEN_PTE_MFN_MASK) >> PAGE_SHIFT;
unsigned long pfn = mfn_to_pfn(mfn);
pteval_t flags = val & PTE_FLAGS_MASK;
@@ -1740,7 +1740,7 @@ static unsigned long __init m2p(phys_addr_t maddr)
{
phys_addr_t paddr;
- maddr &= PTE_PFN_MASK;
+ maddr &= XEN_PTE_MFN_MASK;
paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
return paddr;
--
2.12.3
On 09/21/2017 04:01 AM, Juergen Gross wrote:
> Physical addresses on processors supporting 5 level paging can be up to
> 52 bits wide. For a Xen pv guest running on such a machine those
> physical addresses have to be supported in order to be able to use any
> memory on the machine even if the guest itself does not support 5 level
> paging.
>
> So when reading/writing a MFN from/to a pte don't use the kernel's
> PTE_PFN_MASK but a new XEN_PTE_MFN_MASK allowing full 40 bit wide MFNs.
full 52 bits?
>
> Signed-off-by: Juergen Gross <[email protected]>
> ---
> arch/x86/include/asm/xen/page.h | 11 ++++++++++-
> arch/x86/xen/mmu_pv.c | 4 ++--
> 2 files changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index 07b6531813c4..bcb8b193c8d1 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -26,6 +26,15 @@ typedef struct xpaddr {
> phys_addr_t paddr;
> } xpaddr_t;
>
> +#ifdef CONFIG_X86_64
> +#define XEN_PHYSICAL_MASK ((1UL << 52) - 1)
SME is not supported for PV guests but for consistency (and in case sme
bit somehow gets set)
#define XEN_PHYSICAL_MASK __sme_clr(((1UL << 52) - 1))
But the real question that I have is whether this patch is sufficient.
We are trying to preserve more bits in mfn but then this mfn is used,
say, in pte_pfn_to_mfn() to build a pte. Can we be sure that the pte
won't be stripped of higher bits in native code (again, as an example,
native_make_pte()) because we are compiled with 5LEVEL?
-boris
> +#else
> +#define XEN_PHYSICAL_MASK __PHYSICAL_MASK
> +#endif
> +
> +#define XEN_PTE_MFN_MASK ((pteval_t)(((signed long)PAGE_MASK) & \
> + XEN_PHYSICAL_MASK))
> +
> #define XMADDR(x) ((xmaddr_t) { .maddr = (x) })
> #define XPADDR(x) ((xpaddr_t) { .paddr = (x) })
>
> @@ -277,7 +286,7 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
>
> static inline unsigned long pte_mfn(pte_t pte)
> {
> - return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT;
> + return (pte.pte & XEN_PTE_MFN_MASK) >> PAGE_SHIFT;
> }
>
> static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
> diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
> index 509f560bd0c6..958d36d776d9 100644
> --- a/arch/x86/xen/mmu_pv.c
> +++ b/arch/x86/xen/mmu_pv.c
> @@ -315,7 +315,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
> static pteval_t pte_mfn_to_pfn(pteval_t val)
> {
> if (val & _PAGE_PRESENT) {
> - unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
> + unsigned long mfn = (val & XEN_PTE_MFN_MASK) >> PAGE_SHIFT;
> unsigned long pfn = mfn_to_pfn(mfn);
>
> pteval_t flags = val & PTE_FLAGS_MASK;
> @@ -1740,7 +1740,7 @@ static unsigned long __init m2p(phys_addr_t maddr)
> {
> phys_addr_t paddr;
>
> - maddr &= PTE_PFN_MASK;
> + maddr &= XEN_PTE_MFN_MASK;
> paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
>
> return paddr;
>
On 21/09/17 16:14, Boris Ostrovsky wrote:
>
>
> On 09/21/2017 04:01 AM, Juergen Gross wrote:
>> Physical addresses on processors supporting 5 level paging can be up to
>> 52 bits wide. For a Xen pv guest running on such a machine those
>> physical addresses have to be supported in order to be able to use any
>> memory on the machine even if the guest itself does not support 5 level
>> paging.
>>
>> So when reading/writing a MFN from/to a pte don't use the kernel's
>> PTE_PFN_MASK but a new XEN_PTE_MFN_MASK allowing full 40 bit wide MFNs.
>
> full 52 bits?
The MFN mask is only 40 bits. This plus the 12 bits page offset are 52
bits of machine address width.
>
>>
>> Signed-off-by: Juergen Gross <[email protected]>
>> ---
>> arch/x86/include/asm/xen/page.h | 11 ++++++++++-
>> arch/x86/xen/mmu_pv.c | 4 ++--
>> 2 files changed, 12 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/xen/page.h
>> b/arch/x86/include/asm/xen/page.h
>> index 07b6531813c4..bcb8b193c8d1 100644
>> --- a/arch/x86/include/asm/xen/page.h
>> +++ b/arch/x86/include/asm/xen/page.h
>> @@ -26,6 +26,15 @@ typedef struct xpaddr {
>> phys_addr_t paddr;
>> } xpaddr_t;
>> +#ifdef CONFIG_X86_64
>> +#define XEN_PHYSICAL_MASK ((1UL << 52) - 1)
>
>
> SME is not supported for PV guests but for consistency (and in case sme
> bit somehow gets set)
> #define XEN_PHYSICAL_MASK __sme_clr(((1UL << 52) - 1))
Hmm, really? Shouldn't we rather add something like
BUG_ON(sme_active());
somewhere?
> But the real question that I have is whether this patch is sufficient.
> We are trying to preserve more bits in mfn but then this mfn is used,
> say, in pte_pfn_to_mfn() to build a pte. Can we be sure that the pte
> won't be stripped of higher bits in native code (again, as an example,
> native_make_pte()) because we are compiled with 5LEVEL?
native_make_pte() just encapsulates pte_t. It doesn't modify the value
of the pte at all.
Physical address bits are only ever masked away via PTE_PFN_MASK and I
haven't found any place where it is used for a MFN other than those I
touched in this patch.
Juergen
On 09/21/2017 10:41 AM, Juergen Gross wrote:
> On 21/09/17 16:14, Boris Ostrovsky wrote:
>>
>>
>> On 09/21/2017 04:01 AM, Juergen Gross wrote:
>>> Physical addresses on processors supporting 5 level paging can be up to
>>> 52 bits wide. For a Xen pv guest running on such a machine those
>>> physical addresses have to be supported in order to be able to use any
>>> memory on the machine even if the guest itself does not support 5 level
>>> paging.
>>>
>>> So when reading/writing a MFN from/to a pte don't use the kernel's
>>> PTE_PFN_MASK but a new XEN_PTE_MFN_MASK allowing full 40 bit wide MFNs.
>>
>> full 52 bits?
>
> The MFN mask is only 40 bits. This plus the 12 bits page offset are 52
> bits of machine address width.
Oh, right --- frames, not addresses.
>
>>
>>>
>>> Signed-off-by: Juergen Gross <[email protected]>
>>> ---
>>> arch/x86/include/asm/xen/page.h | 11 ++++++++++-
>>> arch/x86/xen/mmu_pv.c | 4 ++--
>>> 2 files changed, 12 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/xen/page.h
>>> b/arch/x86/include/asm/xen/page.h
>>> index 07b6531813c4..bcb8b193c8d1 100644
>>> --- a/arch/x86/include/asm/xen/page.h
>>> +++ b/arch/x86/include/asm/xen/page.h
>>> @@ -26,6 +26,15 @@ typedef struct xpaddr {
>>> phys_addr_t paddr;
>>> } xpaddr_t;
>>> +#ifdef CONFIG_X86_64
>>> +#define XEN_PHYSICAL_MASK ((1UL << 52) - 1)
>>
>>
>> SME is not supported for PV guests but for consistency (and in case sme
>> bit somehow gets set)
>> #define XEN_PHYSICAL_MASK __sme_clr(((1UL << 52) - 1))
>
> Hmm, really? Shouldn't we rather add something like
>
> BUG_ON(sme_active());
>
> somewhere?
We can do that too.
>
>> But the real question that I have is whether this patch is sufficient.
>> We are trying to preserve more bits in mfn but then this mfn is used,
>> say, in pte_pfn_to_mfn() to build a pte. Can we be sure that the pte
>> won't be stripped of higher bits in native code (again, as an example,
>> native_make_pte()) because we are compiled with 5LEVEL?
>
> native_make_pte() just encapsulates pte_t. It doesn't modify the value
> of the pte at all.
It's an interface though and so we don't know what is (or can) happen
under it.
(I also obviously meant "because we are *not* compiled with 5LEVEL")
>
> Physical address bits are only ever masked away via PTE_PFN_MASK and I
> haven't found any place where it is used for a MFN other than those I
> touched in this patch.
OK.
-boris
On 21/09/17 17:00, Boris Ostrovsky wrote:
>
>>
>>>
>>>>
>>>> Signed-off-by: Juergen Gross <[email protected]>
>>>> ---
>>>> arch/x86/include/asm/xen/page.h | 11 ++++++++++-
>>>> arch/x86/xen/mmu_pv.c | 4 ++--
>>>> 2 files changed, 12 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/xen/page.h
>>>> b/arch/x86/include/asm/xen/page.h
>>>> index 07b6531813c4..bcb8b193c8d1 100644
>>>> --- a/arch/x86/include/asm/xen/page.h
>>>> +++ b/arch/x86/include/asm/xen/page.h
>>>> @@ -26,6 +26,15 @@ typedef struct xpaddr {
>>>> phys_addr_t paddr;
>>>> } xpaddr_t;
>>>> +#ifdef CONFIG_X86_64
>>>> +#define XEN_PHYSICAL_MASK ((1UL << 52) - 1)
>>>
>>>
>>> SME is not supported for PV guests but for consistency (and in case sme
>>> bit somehow gets set)
>>> #define XEN_PHYSICAL_MASK __sme_clr(((1UL << 52) - 1))
>>
>> Hmm, really? Shouldn't we rather add something like
>>
>> BUG_ON(sme_active());
>>
>> somewhere?
>
> We can do that too.
Please don't do anything to cause Linux to crash if Xen is using SME
itself, but leaving all of the PV guest unencrypted.
~Andrew
On 09/21/2017 12:16 PM, Andrew Cooper wrote:
> On 21/09/17 17:00, Boris Ostrovsky wrote:
>>
>>>
>>>>
>>>>>
>>>>> Signed-off-by: Juergen Gross <[email protected]>
>>>>> ---
>>>>> arch/x86/include/asm/xen/page.h | 11 ++++++++++-
>>>>> arch/x86/xen/mmu_pv.c | 4 ++--
>>>>> 2 files changed, 12 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/arch/x86/include/asm/xen/page.h
>>>>> b/arch/x86/include/asm/xen/page.h
>>>>> index 07b6531813c4..bcb8b193c8d1 100644
>>>>> --- a/arch/x86/include/asm/xen/page.h
>>>>> +++ b/arch/x86/include/asm/xen/page.h
>>>>> @@ -26,6 +26,15 @@ typedef struct xpaddr {
>>>>> phys_addr_t paddr;
>>>>> } xpaddr_t;
>>>>> +#ifdef CONFIG_X86_64
>>>>> +#define XEN_PHYSICAL_MASK ((1UL << 52) - 1)
>>>>
>>>>
>>>> SME is not supported for PV guests but for consistency (and in case sme
>>>> bit somehow gets set)
>>>> #define XEN_PHYSICAL_MASK __sme_clr(((1UL << 52) - 1))
>>>
>>> Hmm, really? Shouldn't we rather add something like
>>>
>>> BUG_ON(sme_active());
>>>
>>> somewhere?
>>
>> We can do that too.
>
> Please don't do anything to cause Linux to crash if Xen is using SME
> itself, but leaving all of the PV guest unencrypted.
sme_active() returns true if the *guest* enables it.
Also, if the guest's memory is unencrypted, doesn't this mean that mfns
that it sees (or, rather, ptes) will not have the SME bit set?
-boris