2022-08-08 04:21:45

by Baolu Lu

[permalink] [raw]
Subject: [PATCH 1/1] iommu/vt-d: Fix kdump kernels boot failure with scalable mode

The translation table copying code for kdump kernels is currently based
on the extended root/context entry formats of ECS mode defined in older
VT-d v2.5, and doesn't handle the scalable mode formats. This causes
the kexec capture kernel boot failure with DMAR faults if the IOMMU was
enabled in scalable mode by the previous kernel.

The ECS mode has already been deprecated by the VT-d spec since v3.0 and
Intel IOMMU driver doesn't support this mode as there's no real hardware
implementation. Hence this converts ECS checking in copying table code
into scalable mode.

The existing copying code consumes a bit in the context entry as a mark
of copied entry. This marker needs to work for the old format as well as
for extended context entries. It's hard to find such a bit for both
legacy and scalable mode context entries. This replaces it with a per-
IOMMU bitmap.

Fixes: 7373a8cc38197 ("iommu/vt-d: Setup context and enable RID2PASID support")
Cc: [email protected]
Reported-by: Jerry Snitselaar <[email protected]>
Tested-by: Wen Jin <[email protected]>
Signed-off-by: Lu Baolu <[email protected]>
---
drivers/iommu/intel/iommu.h | 17 ++++++--
drivers/iommu/intel/debugfs.c | 3 +-
drivers/iommu/intel/iommu.c | 76 +++++++++--------------------------
3 files changed, 35 insertions(+), 61 deletions(-)

diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index fae45bbb0c7f..e9b851c42575 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -197,7 +197,6 @@
#define ecap_dis(e) (((e) >> 27) & 0x1)
#define ecap_nest(e) (((e) >> 26) & 0x1)
#define ecap_mts(e) (((e) >> 25) & 0x1)
-#define ecap_ecs(e) (((e) >> 24) & 0x1)
#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
#define ecap_coherent(e) ((e) & 0x1)
@@ -265,7 +264,6 @@
#define DMA_GSTS_CFIS (((u32)1) << 23)

/* DMA_RTADDR_REG */
-#define DMA_RTADDR_RTT (((u64)1) << 11)
#define DMA_RTADDR_SMT (((u64)1) << 10)

/* CCMD_REG */
@@ -579,6 +577,7 @@ struct intel_iommu {

#ifdef CONFIG_INTEL_IOMMU
unsigned long *domain_ids; /* bitmap of domains */
+ unsigned long *copied_tables; /* bitmap of copied tables */
spinlock_t lock; /* protect context, domain ids */
struct root_entry *root_entry; /* virtual address */

@@ -701,6 +700,19 @@ static inline int nr_pte_to_next_page(struct dma_pte *pte)
(struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
}

+static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
+{
+ if (!iommu->copied_tables)
+ return false;
+
+ return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
+}
+
+static inline bool context_present(struct context_entry *context)
+{
+ return (context->lo & 1);
+}
+
extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);

extern int dmar_enable_qi(struct intel_iommu *iommu);
@@ -784,7 +796,6 @@ static inline void intel_iommu_debugfs_init(void) {}
#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */

extern const struct attribute_group *intel_iommu_groups[];
-bool context_present(struct context_entry *context);
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
u8 devfn, int alloc);

diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index 1f925285104e..f4fd249daad9 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -241,7 +241,8 @@ static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 bus)
if (!context)
return;

- if (!context_present(context))
+ if (!context_present(context) ||
+ context_copied(iommu, bus, devfn))
continue;

tbl_wlk.bus = bus;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7cca030a508e..889ad2c9a7b9 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -163,38 +163,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
return re->hi & VTD_PAGE_MASK;
}

-static inline void context_clear_pasid_enable(struct context_entry *context)
-{
- context->lo &= ~(1ULL << 11);
-}
-
-static inline bool context_pasid_enabled(struct context_entry *context)
-{
- return !!(context->lo & (1ULL << 11));
-}
-
-static inline void context_set_copied(struct context_entry *context)
-{
- context->hi |= (1ull << 3);
-}
-
-static inline bool context_copied(struct context_entry *context)
-{
- return !!(context->hi & (1ULL << 3));
-}
-
-static inline bool __context_present(struct context_entry *context)
-{
- return (context->lo & 1);
-}
-
-bool context_present(struct context_entry *context)
-{
- return context_pasid_enabled(context) ?
- __context_present(context) :
- __context_present(context) && !context_copied(context);
-}
-
static inline void context_set_present(struct context_entry *context)
{
context->lo |= 1;
@@ -764,7 +732,8 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
spin_lock(&iommu->lock);
context = iommu_context_addr(iommu, bus, devfn, 0);
if (context)
- ret = context_present(context);
+ ret = context_present(context) &&
+ !context_copied(iommu, bus, devfn);
spin_unlock(&iommu->lock);
return ret;
}
@@ -1688,6 +1657,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
iommu->domain_ids = NULL;
}

+ if (iommu->copied_tables) {
+ bitmap_free(iommu->copied_tables);
+ iommu->copied_tables = NULL;
+ }
+
/* free context mapping */
free_context_table(iommu);

@@ -1913,7 +1887,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
goto out_unlock;

ret = 0;
- if (context_present(context))
+ if (context_present(context) && !context_copied(iommu, bus, devfn))
goto out_unlock;

/*
@@ -1925,7 +1899,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
* in-flight DMA will exist, and we don't need to worry anymore
* hereafter.
*/
- if (context_copied(context)) {
+ if (context_copied(iommu, bus, devfn)) {
u16 did_old = context_domain_id(context);

if (did_old < cap_ndoms(iommu->cap)) {
@@ -1936,6 +1910,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
DMA_TLB_DSI_FLUSH);
}
+
+ clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
}

context_clear_entry(context);
@@ -2684,32 +2660,14 @@ static int copy_context_table(struct intel_iommu *iommu,
/* Now copy the context entry */
memcpy(&ce, old_ce + idx, sizeof(ce));

- if (!__context_present(&ce))
+ if (!context_present(&ce))
continue;

did = context_domain_id(&ce);
if (did >= 0 && did < cap_ndoms(iommu->cap))
set_bit(did, iommu->domain_ids);

- /*
- * We need a marker for copied context entries. This
- * marker needs to work for the old format as well as
- * for extended context entries.
- *
- * Bit 67 of the context entry is used. In the old
- * format this bit is available to software, in the
- * extended format it is the PGE bit, but PGE is ignored
- * by HW if PASIDs are disabled (and thus still
- * available).
- *
- * So disable PASIDs first and then mark the entry
- * copied. This means that we don't copy PASID
- * translations from the old kernel, but this is fine as
- * faults there are not fatal.
- */
- context_clear_pasid_enable(&ce);
- context_set_copied(&ce);
-
+ set_bit(((long)bus << 8) | devfn, iommu->copied_tables);
new_ce[idx] = ce;
}

@@ -2735,8 +2693,8 @@ static int copy_translation_tables(struct intel_iommu *iommu)
bool new_ext, ext;

rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
- ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
- new_ext = !!ecap_ecs(iommu->ecap);
+ ext = !!(rtaddr_reg & DMA_RTADDR_SMT);
+ new_ext = !!ecap_smts(iommu->ecap);

/*
* The RTT bit can only be changed when translation is disabled,
@@ -2747,6 +2705,10 @@ static int copy_translation_tables(struct intel_iommu *iommu)
if (new_ext != ext)
return -EINVAL;

+ iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
+ if (!iommu->copied_tables)
+ return -ENOMEM;
+
old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
if (!old_rt_phys)
return -EINVAL;
--
2.25.1


2022-08-08 09:17:29

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/1] iommu/vt-d: Fix kdump kernels boot failure with scalable mode

Hi Lu,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on next-20220808]
[cannot apply to joro-iommu/next v5.19]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Lu-Baolu/iommu-vt-d-Fix-kdump-kernels-boot-failure-with-scalable-mode/20220808-115156
base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 4e23eeebb2e57f5a28b36221aa776b5a1122dde5
config: x86_64-randconfig-a011-20220808 (https://download.01.org/0day-ci/archive/20220808/[email protected]/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 5f1c7e2cc5a3c07cbc2412e851a7283c1841f520)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/335018299049ac5dc13ff12d320b5952bed7e8f8
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Lu-Baolu/iommu-vt-d-Fix-kdump-kernels-boot-failure-with-scalable-mode/20220808-115156
git checkout 335018299049ac5dc13ff12d320b5952bed7e8f8
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

In file included from drivers/iommu/intel/dmar.c:33:
>> drivers/iommu/intel/iommu.h:705:14: error: no member named 'copied_tables' in 'struct intel_iommu'
if (!iommu->copied_tables)
~~~~~ ^
drivers/iommu/intel/iommu.h:708:51: error: no member named 'copied_tables' in 'struct intel_iommu'
return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
~~~~~ ^
include/linux/bitops.h:61:50: note: expanded from macro 'test_bit'
#define test_bit(nr, addr) bitop(_test_bit, nr, addr)
^~~~
include/linux/bitops.h:50:37: note: expanded from macro 'bitop'
__builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \
^~~~
In file included from drivers/iommu/intel/dmar.c:33:
drivers/iommu/intel/iommu.h:708:51: error: no member named 'copied_tables' in 'struct intel_iommu'
return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
~~~~~ ^
include/linux/bitops.h:61:50: note: expanded from macro 'test_bit'
#define test_bit(nr, addr) bitop(_test_bit, nr, addr)
^~~~
include/linux/bitops.h:51:16: note: expanded from macro 'bitop'
(uintptr_t)(addr) != (uintptr_t)NULL && \
^~~~
In file included from drivers/iommu/intel/dmar.c:33:
drivers/iommu/intel/iommu.h:708:51: error: no member named 'copied_tables' in 'struct intel_iommu'
return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
~~~~~ ^
include/linux/bitops.h:61:50: note: expanded from macro 'test_bit'
#define test_bit(nr, addr) bitop(_test_bit, nr, addr)
^~~~
include/linux/bitops.h:52:50: note: expanded from macro 'bitop'
__builtin_constant_p(*(const unsigned long *)(addr))) ? \
^~~~
In file included from drivers/iommu/intel/dmar.c:33:
drivers/iommu/intel/iommu.h:708:51: error: no member named 'copied_tables' in 'struct intel_iommu'
return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
~~~~~ ^
include/linux/bitops.h:61:50: note: expanded from macro 'test_bit'
#define test_bit(nr, addr) bitop(_test_bit, nr, addr)
^~~~
include/linux/bitops.h:53:17: note: expanded from macro 'bitop'
const##op(nr, addr) : op(nr, addr))
^~~~
In file included from drivers/iommu/intel/dmar.c:33:
drivers/iommu/intel/iommu.h:708:51: error: no member named 'copied_tables' in 'struct intel_iommu'
return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
~~~~~ ^
include/linux/bitops.h:61:50: note: expanded from macro 'test_bit'
#define test_bit(nr, addr) bitop(_test_bit, nr, addr)
^~~~
include/linux/bitops.h:53:32: note: expanded from macro 'bitop'
const##op(nr, addr) : op(nr, addr))
^~~~
6 errors generated.


vim +705 drivers/iommu/intel/iommu.h

702
703 static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
704 {
> 705 if (!iommu->copied_tables)
706 return false;
707
708 return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
709 }
710

--
0-DAY CI Kernel Test Service
https://01.org/lkp

2022-08-08 16:40:23

by Jerry Snitselaar

[permalink] [raw]
Subject: Re: [PATCH 1/1] iommu/vt-d: Fix kdump kernels boot failure with scalable mode

On Mon, Aug 08, 2022 at 11:46:12AM +0800, Lu Baolu wrote:
> The translation table copying code for kdump kernels is currently based
> on the extended root/context entry formats of ECS mode defined in older
> VT-d v2.5, and doesn't handle the scalable mode formats. This causes
> the kexec capture kernel boot failure with DMAR faults if the IOMMU was
> enabled in scalable mode by the previous kernel.
>
> The ECS mode has already been deprecated by the VT-d spec since v3.0 and
> Intel IOMMU driver doesn't support this mode as there's no real hardware
> implementation. Hence this converts ECS checking in copying table code
> into scalable mode.
>
> The existing copying code consumes a bit in the context entry as a mark
> of copied entry. This marker needs to work for the old format as well as
> for extended context entries. It's hard to find such a bit for both
> legacy and scalable mode context entries. This replaces it with a per-
> IOMMU bitmap.
>
> Fixes: 7373a8cc38197 ("iommu/vt-d: Setup context and enable RID2PASID support")
> Cc: [email protected]
> Reported-by: Jerry Snitselaar <[email protected]>
> Tested-by: Wen Jin <[email protected]>
> Signed-off-by: Lu Baolu <[email protected]>
> ---

I did a quick test last night, and it was able to harvest the vmcore,
and boot back up. Before you mentioned part of the issue being that it
couldn't get to the PGTT field in the pasid table entry. Was that not
the case, or is it looking at the old kernel pasid dir entries and
table entries through the pasid dir pointer in the copied context
entry?

Regards,
Jerry

> drivers/iommu/intel/iommu.h | 17 ++++++--
> drivers/iommu/intel/debugfs.c | 3 +-
> drivers/iommu/intel/iommu.c | 76 +++++++++--------------------------
> 3 files changed, 35 insertions(+), 61 deletions(-)
>
> diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
> index fae45bbb0c7f..e9b851c42575 100644
> --- a/drivers/iommu/intel/iommu.h
> +++ b/drivers/iommu/intel/iommu.h
> @@ -197,7 +197,6 @@
> #define ecap_dis(e) (((e) >> 27) & 0x1)
> #define ecap_nest(e) (((e) >> 26) & 0x1)
> #define ecap_mts(e) (((e) >> 25) & 0x1)
> -#define ecap_ecs(e) (((e) >> 24) & 0x1)
> #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
> #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
> #define ecap_coherent(e) ((e) & 0x1)
> @@ -265,7 +264,6 @@
> #define DMA_GSTS_CFIS (((u32)1) << 23)
>
> /* DMA_RTADDR_REG */
> -#define DMA_RTADDR_RTT (((u64)1) << 11)
> #define DMA_RTADDR_SMT (((u64)1) << 10)
>
> /* CCMD_REG */
> @@ -579,6 +577,7 @@ struct intel_iommu {
>
> #ifdef CONFIG_INTEL_IOMMU
> unsigned long *domain_ids; /* bitmap of domains */
> + unsigned long *copied_tables; /* bitmap of copied tables */
> spinlock_t lock; /* protect context, domain ids */
> struct root_entry *root_entry; /* virtual address */
>
> @@ -701,6 +700,19 @@ static inline int nr_pte_to_next_page(struct dma_pte *pte)
> (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
> }
>
> +static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
> +{
> + if (!iommu->copied_tables)
> + return false;
> +
> + return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
> +}
> +
> +static inline bool context_present(struct context_entry *context)
> +{
> + return (context->lo & 1);
> +}
> +
> extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
>
> extern int dmar_enable_qi(struct intel_iommu *iommu);
> @@ -784,7 +796,6 @@ static inline void intel_iommu_debugfs_init(void) {}
> #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
>
> extern const struct attribute_group *intel_iommu_groups[];
> -bool context_present(struct context_entry *context);
> struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
> u8 devfn, int alloc);
>
> diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
> index 1f925285104e..f4fd249daad9 100644
> --- a/drivers/iommu/intel/debugfs.c
> +++ b/drivers/iommu/intel/debugfs.c
> @@ -241,7 +241,8 @@ static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 bus)
> if (!context)
> return;
>
> - if (!context_present(context))
> + if (!context_present(context) ||
> + context_copied(iommu, bus, devfn))
> continue;
>
> tbl_wlk.bus = bus;
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index 7cca030a508e..889ad2c9a7b9 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -163,38 +163,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
> return re->hi & VTD_PAGE_MASK;
> }
>
> -static inline void context_clear_pasid_enable(struct context_entry *context)
> -{
> - context->lo &= ~(1ULL << 11);
> -}
> -
> -static inline bool context_pasid_enabled(struct context_entry *context)
> -{
> - return !!(context->lo & (1ULL << 11));
> -}
> -
> -static inline void context_set_copied(struct context_entry *context)
> -{
> - context->hi |= (1ull << 3);
> -}
> -
> -static inline bool context_copied(struct context_entry *context)
> -{
> - return !!(context->hi & (1ULL << 3));
> -}
> -
> -static inline bool __context_present(struct context_entry *context)
> -{
> - return (context->lo & 1);
> -}
> -
> -bool context_present(struct context_entry *context)
> -{
> - return context_pasid_enabled(context) ?
> - __context_present(context) :
> - __context_present(context) && !context_copied(context);
> -}
> -
> static inline void context_set_present(struct context_entry *context)
> {
> context->lo |= 1;
> @@ -764,7 +732,8 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
> spin_lock(&iommu->lock);
> context = iommu_context_addr(iommu, bus, devfn, 0);
> if (context)
> - ret = context_present(context);
> + ret = context_present(context) &&
> + !context_copied(iommu, bus, devfn);
> spin_unlock(&iommu->lock);
> return ret;
> }
> @@ -1688,6 +1657,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
> iommu->domain_ids = NULL;
> }
>
> + if (iommu->copied_tables) {
> + bitmap_free(iommu->copied_tables);
> + iommu->copied_tables = NULL;
> + }
> +
> /* free context mapping */
> free_context_table(iommu);
>
> @@ -1913,7 +1887,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
> goto out_unlock;
>
> ret = 0;
> - if (context_present(context))
> + if (context_present(context) && !context_copied(iommu, bus, devfn))
> goto out_unlock;
>
> /*
> @@ -1925,7 +1899,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
> * in-flight DMA will exist, and we don't need to worry anymore
> * hereafter.
> */
> - if (context_copied(context)) {
> + if (context_copied(iommu, bus, devfn)) {
> u16 did_old = context_domain_id(context);
>
> if (did_old < cap_ndoms(iommu->cap)) {
> @@ -1936,6 +1910,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
> iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
> DMA_TLB_DSI_FLUSH);
> }
> +
> + clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
> }
>
> context_clear_entry(context);
> @@ -2684,32 +2660,14 @@ static int copy_context_table(struct intel_iommu *iommu,
> /* Now copy the context entry */
> memcpy(&ce, old_ce + idx, sizeof(ce));
>
> - if (!__context_present(&ce))
> + if (!context_present(&ce))
> continue;
>
> did = context_domain_id(&ce);
> if (did >= 0 && did < cap_ndoms(iommu->cap))
> set_bit(did, iommu->domain_ids);
>
> - /*
> - * We need a marker for copied context entries. This
> - * marker needs to work for the old format as well as
> - * for extended context entries.
> - *
> - * Bit 67 of the context entry is used. In the old
> - * format this bit is available to software, in the
> - * extended format it is the PGE bit, but PGE is ignored
> - * by HW if PASIDs are disabled (and thus still
> - * available).
> - *
> - * So disable PASIDs first and then mark the entry
> - * copied. This means that we don't copy PASID
> - * translations from the old kernel, but this is fine as
> - * faults there are not fatal.
> - */
> - context_clear_pasid_enable(&ce);
> - context_set_copied(&ce);
> -
> + set_bit(((long)bus << 8) | devfn, iommu->copied_tables);
> new_ce[idx] = ce;
> }
>
> @@ -2735,8 +2693,8 @@ static int copy_translation_tables(struct intel_iommu *iommu)
> bool new_ext, ext;
>
> rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
> - ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
> - new_ext = !!ecap_ecs(iommu->ecap);
> + ext = !!(rtaddr_reg & DMA_RTADDR_SMT);
> + new_ext = !!ecap_smts(iommu->ecap);
>
> /*
> * The RTT bit can only be changed when translation is disabled,
> @@ -2747,6 +2705,10 @@ static int copy_translation_tables(struct intel_iommu *iommu)
> if (new_ext != ext)
> return -EINVAL;
>
> + iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
> + if (!iommu->copied_tables)
> + return -ENOMEM;
> +
> old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
> if (!old_rt_phys)
> return -EINVAL;
> --
> 2.25.1
>

2022-08-12 04:09:23

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH 1/1] iommu/vt-d: Fix kdump kernels boot failure with scalable mode

On 2022/8/8 17:04, kernel test robot wrote:
> Hi Lu,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on linus/master]
> [also build test ERROR on next-20220808]
> [cannot apply to joro-iommu/next v5.19]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url:https://github.com/intel-lab-lkp/linux/commits/Lu-Baolu/iommu-vt-d-Fix-kdump-kernels-boot-failure-with-scalable-mode/20220808-115156
> base:https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 4e23eeebb2e57f5a28b36221aa776b5a1122dde5
> config: x86_64-randconfig-a011-20220808 (https://download.01.org/0day-ci/archive/20220808/[email protected]/config)
> compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 5f1c7e2cc5a3c07cbc2412e851a7283c1841f520)
> reproduce (this is a W=1 build):
> wgethttps://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> #https://github.com/intel-lab-lkp/linux/commit/335018299049ac5dc13ff12d320b5952bed7e8f8
> git remote add linux-reviewhttps://github.com/intel-lab-lkp/linux
> git fetch --no-tags linux-review Lu-Baolu/iommu-vt-d-Fix-kdump-kernels-boot-failure-with-scalable-mode/20220808-115156
> git checkout 335018299049ac5dc13ff12d320b5952bed7e8f8
> # save the config file
> mkdir build_dir && cp config build_dir/.config
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
>
> If you fix the issue, kindly add following tag where applicable
> Reported-by: kernel test robot<[email protected]>
>
> All errors (new ones prefixed by >>):
>
> In file included from drivers/iommu/intel/dmar.c:33:
>>> drivers/iommu/intel/iommu.h:705:14: error: no member named 'copied_tables' in 'struct intel_iommu'

Thanks for reporting. This happens when INTEL_IOMMU=n && DMAR=y. I will
fix it in the next version.

Best regards,
baolu

2022-08-12 04:12:08

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH 1/1] iommu/vt-d: Fix kdump kernels boot failure with scalable mode

Hi Jerry,

On 2022/8/9 00:21, Jerry Snitselaar wrote:
> On Mon, Aug 08, 2022 at 11:46:12AM +0800, Lu Baolu wrote:
>> The translation table copying code for kdump kernels is currently based
>> on the extended root/context entry formats of ECS mode defined in older
>> VT-d v2.5, and doesn't handle the scalable mode formats. This causes
>> the kexec capture kernel boot failure with DMAR faults if the IOMMU was
>> enabled in scalable mode by the previous kernel.
>>
>> The ECS mode has already been deprecated by the VT-d spec since v3.0 and
>> Intel IOMMU driver doesn't support this mode as there's no real hardware
>> implementation. Hence this converts ECS checking in copying table code
>> into scalable mode.
>>
>> The existing copying code consumes a bit in the context entry as a mark
>> of copied entry. This marker needs to work for the old format as well as
>> for extended context entries. It's hard to find such a bit for both
>> legacy and scalable mode context entries. This replaces it with a per-
>> IOMMU bitmap.
>>
>> Fixes: 7373a8cc38197 ("iommu/vt-d: Setup context and enable RID2PASID support")
>> Cc:[email protected]
>> Reported-by: Jerry Snitselaar<[email protected]>
>> Tested-by: Wen Jin<[email protected]>
>> Signed-off-by: Lu Baolu<[email protected]>
>> ---
> I did a quick test last night, and it was able to harvest the vmcore,
> and boot back up. Before you mentioned part of the issue being that it
> couldn't get to the PGTT field in the pasid table entry. Was that not
> the case,

It is the case from IOMMU hardware point of view.

> or is it looking at the old kernel pasid dir entries and
> table entries through the pasid dir pointer in the copied context
> entry?

Yes. It reuses the pasid table in old kernel and replaces it until the
new device driver takes over and starts the first DMA operation.

Best regards,
baolu