LinuxLists.cc - [PATCH v2 07/23] mips: update_mmu_cache() can replace __update

2023-06-08 19:24:13

Subject: [PATCH v2 07/23] mips: update_mmu_cache() can replace __update_tlb()

Don't make update_mmu_cache() a wrapper around __update_tlb(): call it
directly, and use the ptep (or pmdp) provided by the caller, instead of
re-calling pte_offset_map() - which would raise a question of whether a
pte_unmap() is needed to balance it.

Check whether the "ptep" provided by the caller is actually the pmdp,
instead of testing pmd_huge(): or test pmd_huge() too and warn if it
disagrees? This is "hazardous" territory: needs review and testing.

Signed-off-by: Hugh Dickins <[email protected]>
---
arch/mips/include/asm/pgtable.h | 15 +++------------
arch/mips/mm/tlb-r3k.c | 5 +++--
arch/mips/mm/tlb-r4k.c | 9 +++------
3 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 574fa14ac8b2..9175dfab08d5 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -565,15 +565,8 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
}
#endif

-extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
- pte_t pte);
-
-static inline void update_mmu_cache(struct vm_area_struct *vma,
- unsigned long address, pte_t *ptep)
-{
- pte_t pte = *ptep;
- __update_tlb(vma, address, pte);
-}
+extern void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep);

#define __HAVE_ARCH_UPDATE_MMU_TLB
#define update_mmu_tlb update_mmu_cache
@@ -581,9 +574,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
- pte_t pte = *(pte_t *)pmdp;
-
- __update_tlb(vma, address, pte);
+ update_mmu_cache(vma, address, (pte_t *)pmdp);
}

/*
diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c
index 53dfa2b9316b..e5722cd8dd6d 100644
--- a/arch/mips/mm/tlb-r3k.c
+++ b/arch/mips/mm/tlb-r3k.c
@@ -176,7 +176,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
}
}

-void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
+void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
{
unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
unsigned long flags;
@@ -203,7 +204,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
BARRIER;
tlb_probe();
idx = read_c0_index();
- write_c0_entrylo0(pte_val(pte));
+ write_c0_entrylo0(pte_val(*ptep));
write_c0_entryhi(address | pid);
if (idx < 0) { /* BARRIER */
tlb_write_random();
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 1b939abbe4ca..c96725d17cab 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -290,14 +290,14 @@ void local_flush_tlb_one(unsigned long page)
* updates the TLB with the new pte(s), and another which also checks
* for the R4k "end of page" hardware bug and does the needy.
*/
-void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
+void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
{
unsigned long flags;
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
- pte_t *ptep;
int idx, pid;

/*
@@ -326,10 +326,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
idx = read_c0_index();
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
/* this could be a huge page */
- if (pmd_huge(*pmdp)) {
+ if (ptep == (pte_t *)pmdp) {
unsigned long lo;
write_c0_pagemask(PM_HUGE_MASK);
- ptep = (pte_t *)pmdp;
lo = pte_to_entrylo(pte_val(*ptep));
write_c0_entrylo0(lo);
write_c0_entrylo1(lo + (HPAGE_SIZE >> 7));
@@ -344,8 +343,6 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
} else
#endif
{
- ptep = pte_offset_map(pmdp, address);
-
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
#ifdef CONFIG_XPA
write_c0_entrylo0(pte_to_entrylo(ptep->pte_high));
--
2.35.3

2023-06-09 08:23:48

by Hugh Dickins

[permalink] [raw]

Subject: [PATCH v2 07/23 fix] mips: update_mmu_cache() can replace __update_tlb(): fix

I expect this to fix the
arch/mips/mm/tlb-r4k.c:300:16: warning: variable 'pmdp' set but not used
reported by the kernel test robot; but I am uncomfortable rearranging
lines in this tlb_probe_hazard() area, and would be glad for review and
testing by someone familiar with mips - thanks in advance!

Reported-by: kernel test robot <[email protected]>
Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
Signed-off-by: Hugh Dickins <[email protected]>
---
arch/mips/mm/tlb-r4k.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index c96725d17cab..80fc90d8d2f1 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -293,11 +293,13 @@ void local_flush_tlb_one(unsigned long page)
void update_mmu_cache(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- unsigned long flags;
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
pgd_t *pgdp;
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
+#endif
+ unsigned long flags;
int idx, pid;

/*
@@ -316,15 +318,15 @@ void update_mmu_cache(struct vm_area_struct *vma,
pid = read_c0_entryhi() & cpu_asid_mask(&current_cpu_data);
write_c0_entryhi(address | pid);
}
- pgdp = pgd_offset(vma->vm_mm, address);
mtc0_tlbw_hazard();
tlb_probe();
tlb_probe_hazard();
+ idx = read_c0_index();
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+ pgdp = pgd_offset(vma->vm_mm, address);
p4dp = p4d_offset(pgdp, address);
pudp = pud_offset(p4dp, address);
pmdp = pmd_offset(pudp, address);
- idx = read_c0_index();
-#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
/* this could be a huge page */
if (ptep == (pte_t *)pmdp) {
unsigned long lo;
--
2.35.3

2023-06-14 23:38:02

by Nathan Chancellor

[permalink] [raw]

Subject: Re: [PATCH v2 07/23] mips: update_mmu_cache() can replace __update_tlb()

Hi Hugh,

On Thu, Jun 08, 2023 at 12:17:24PM -0700, Hugh Dickins wrote:
> Don't make update_mmu_cache() a wrapper around __update_tlb(): call it
> directly, and use the ptep (or pmdp) provided by the caller, instead of
> re-calling pte_offset_map() - which would raise a question of whether a
> pte_unmap() is needed to balance it.
>
> Check whether the "ptep" provided by the caller is actually the pmdp,
> instead of testing pmd_huge(): or test pmd_huge() too and warn if it
> disagrees? This is "hazardous" territory: needs review and testing.
>
> Signed-off-by: Hugh Dickins <[email protected]>
> ---
> arch/mips/include/asm/pgtable.h | 15 +++------------
> arch/mips/mm/tlb-r3k.c | 5 +++--
> arch/mips/mm/tlb-r4k.c | 9 +++------
> 3 files changed, 9 insertions(+), 20 deletions(-)
>
> diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
> index 574fa14ac8b2..9175dfab08d5 100644
> --- a/arch/mips/include/asm/pgtable.h
> +++ b/arch/mips/include/asm/pgtable.h
> @@ -565,15 +565,8 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
> }
> #endif
>
> -extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
> - pte_t pte);
> -
> -static inline void update_mmu_cache(struct vm_area_struct *vma,
> - unsigned long address, pte_t *ptep)
> -{
> - pte_t pte = *ptep;
> - __update_tlb(vma, address, pte);
> -}
> +extern void update_mmu_cache(struct vm_area_struct *vma,
> + unsigned long address, pte_t *ptep);
>
> #define __HAVE_ARCH_UPDATE_MMU_TLB
> #define update_mmu_tlb update_mmu_cache
> @@ -581,9 +574,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
> static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
> unsigned long address, pmd_t *pmdp)
> {
> - pte_t pte = *(pte_t *)pmdp;
> -
> - __update_tlb(vma, address, pte);
> + update_mmu_cache(vma, address, (pte_t *)pmdp);
> }
>
> /*
> diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c
> index 53dfa2b9316b..e5722cd8dd6d 100644
> --- a/arch/mips/mm/tlb-r3k.c
> +++ b/arch/mips/mm/tlb-r3k.c
> @@ -176,7 +176,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
> }
> }
>
> -void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
> +void update_mmu_cache(struct vm_area_struct *vma,
> + unsigned long address, pte_t *ptep)
> {
> unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
> unsigned long flags;
> @@ -203,7 +204,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
> BARRIER;
> tlb_probe();
> idx = read_c0_index();
> - write_c0_entrylo0(pte_val(pte));
> + write_c0_entrylo0(pte_val(*ptep));
> write_c0_entryhi(address | pid);
> if (idx < 0) { /* BARRIER */
> tlb_write_random();
> diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
> index 1b939abbe4ca..c96725d17cab 100644
> --- a/arch/mips/mm/tlb-r4k.c
> +++ b/arch/mips/mm/tlb-r4k.c
> @@ -290,14 +290,14 @@ void local_flush_tlb_one(unsigned long page)
> * updates the TLB with the new pte(s), and another which also checks
> * for the R4k "end of page" hardware bug and does the needy.
> */
> -void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
> +void update_mmu_cache(struct vm_area_struct *vma,
> + unsigned long address, pte_t *ptep)
> {
> unsigned long flags;
> pgd_t *pgdp;
> p4d_t *p4dp;
> pud_t *pudp;
> pmd_t *pmdp;
> - pte_t *ptep;
> int idx, pid;
>
> /*
> @@ -326,10 +326,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
> idx = read_c0_index();
> #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
> /* this could be a huge page */
> - if (pmd_huge(*pmdp)) {
> + if (ptep == (pte_t *)pmdp) {
> unsigned long lo;
> write_c0_pagemask(PM_HUGE_MASK);
> - ptep = (pte_t *)pmdp;
> lo = pte_to_entrylo(pte_val(*ptep));
> write_c0_entrylo0(lo);
> write_c0_entrylo1(lo + (HPAGE_SIZE >> 7));
> @@ -344,8 +343,6 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
> } else
> #endif
> {
> - ptep = pte_offset_map(pmdp, address);
> -
> #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
> #ifdef CONFIG_XPA
> write_c0_entrylo0(pte_to_entrylo(ptep->pte_high));
> --
> 2.35.3
>

I just bisected a crash while powering down a MIPS machine in QEMU to
this change as commit 8044511d3893 ("mips: update_mmu_cache() can
replace __update_tlb()") in linux-next. Unfortunately, I can still
reproduce it with the existing fix you have for this change on the
mailing list, which is present in next-20230614.

I can reproduce it with the GCC 13.1.0 on kernel.org [1].

$ make -skj"$(nproc)" ARCH=mips CROSS_COMPILE=mips-linux- mrproper malta_defconfig vmlinux

$ qemu-system-mipsel \
-display none \
-nodefaults \
-cpu 24Kf \
-machine malta \
-kernel vmlinux \
-initrd rootfs.cpio \
-m 512m \
-serial mon:stdio
...
Linux version 6.4.0-rc6-next-20230614 ([email protected]) (mips-linux-gcc (GCC) 13.1.0, GNU ld (GNU Binutils) 2.40) #1 SMP Wed Jun 14 16:13:02 MST 2023
...
Run /init as init process
process '/bin/busybox' started with executable stack
do_page_fault(): sending SIGSEGV to init for invalid read access from 0000003c
epc = 77b893dc in ld-uClibc-1.0.39.so[77b84000+8000]
ra = 77b8930c in ld-uClibc-1.0.39.so[77b84000+8000]
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---

The rootfs is available at [2] if it is needed. I am more than happy to
provide additional information or test patches if necessary.

[1]: https://mirrors.edge.kernel.org/pub/tools/crosstool/
[2]: https://github.com/ClangBuiltLinux/boot-utils/releases/download/20230609-194440/mipsel-rootfs.cpio.zst

Cheers,
Nathan

2023-06-15 00:45:04

by Hugh Dickins

[permalink] [raw]

Subject: Re: [PATCH v2 07/23] mips: update_mmu_cache() can replace __update_tlb()

On Wed, 14 Jun 2023, Nathan Chancellor wrote:

> Hi Hugh,
>
> On Thu, Jun 08, 2023 at 12:17:24PM -0700, Hugh Dickins wrote:
> > Don't make update_mmu_cache() a wrapper around __update_tlb(): call it
> > directly, and use the ptep (or pmdp) provided by the caller, instead of
> > re-calling pte_offset_map() - which would raise a question of whether a
> > pte_unmap() is needed to balance it.
> >
> > Check whether the "ptep" provided by the caller is actually the pmdp,
> > instead of testing pmd_huge(): or test pmd_huge() too and warn if it
> > disagrees? This is "hazardous" territory: needs review and testing.
> >
> > Signed-off-by: Hugh Dickins <[email protected]>
> > ---
> > arch/mips/include/asm/pgtable.h | 15 +++------------
> > arch/mips/mm/tlb-r3k.c | 5 +++--
> > arch/mips/mm/tlb-r4k.c | 9 +++------
> > 3 files changed, 9 insertions(+), 20 deletions(-)
> >
> > diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
> > index 574fa14ac8b2..9175dfab08d5 100644
> > --- a/arch/mips/include/asm/pgtable.h
> > +++ b/arch/mips/include/asm/pgtable.h
> > @@ -565,15 +565,8 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
> > }
> > #endif
> >
> > -extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
> > - pte_t pte);
> > -
> > -static inline void update_mmu_cache(struct vm_area_struct *vma,
> > - unsigned long address, pte_t *ptep)
> > -{
> > - pte_t pte = *ptep;
> > - __update_tlb(vma, address, pte);
> > -}
> > +extern void update_mmu_cache(struct vm_area_struct *vma,
> > + unsigned long address, pte_t *ptep);
> >
> > #define __HAVE_ARCH_UPDATE_MMU_TLB
> > #define update_mmu_tlb update_mmu_cache
> > @@ -581,9 +574,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
> > static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
> > unsigned long address, pmd_t *pmdp)
> > {
> > - pte_t pte = *(pte_t *)pmdp;
> > -
> > - __update_tlb(vma, address, pte);
> > + update_mmu_cache(vma, address, (pte_t *)pmdp);
> > }
> >
> > /*
> > diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c
> > index 53dfa2b9316b..e5722cd8dd6d 100644
> > --- a/arch/mips/mm/tlb-r3k.c
> > +++ b/arch/mips/mm/tlb-r3k.c
> > @@ -176,7 +176,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
> > }
> > }
> >
> > -void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
> > +void update_mmu_cache(struct vm_area_struct *vma,
> > + unsigned long address, pte_t *ptep)
> > {
> > unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
> > unsigned long flags;
> > @@ -203,7 +204,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte)
> > BARRIER;
> > tlb_probe();
> > idx = read_c0_index();
> > - write_c0_entrylo0(pte_val(pte));
> > + write_c0_entrylo0(pte_val(*ptep));
> > write_c0_entryhi(address | pid);
> > if (idx < 0) { /* BARRIER */
> > tlb_write_random();
> > diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
> > index 1b939abbe4ca..c96725d17cab 100644
> > --- a/arch/mips/mm/tlb-r4k.c
> > +++ b/arch/mips/mm/tlb-r4k.c
> > @@ -290,14 +290,14 @@ void local_flush_tlb_one(unsigned long page)
> > * updates the TLB with the new pte(s), and another which also checks
> > * for the R4k "end of page" hardware bug and does the needy.
> > */
> > -void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
> > +void update_mmu_cache(struct vm_area_struct *vma,
> > + unsigned long address, pte_t *ptep)
> > {
> > unsigned long flags;
> > pgd_t *pgdp;
> > p4d_t *p4dp;
> > pud_t *pudp;
> > pmd_t *pmdp;
> > - pte_t *ptep;
> > int idx, pid;
> >
> > /*
> > @@ -326,10 +326,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
> > idx = read_c0_index();
> > #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
> > /* this could be a huge page */
> > - if (pmd_huge(*pmdp)) {
> > + if (ptep == (pte_t *)pmdp) {
> > unsigned long lo;
> > write_c0_pagemask(PM_HUGE_MASK);
> > - ptep = (pte_t *)pmdp;
> > lo = pte_to_entrylo(pte_val(*ptep));
> > write_c0_entrylo0(lo);
> > write_c0_entrylo1(lo + (HPAGE_SIZE >> 7));
> > @@ -344,8 +343,6 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
> > } else
> > #endif
> > {
> > - ptep = pte_offset_map(pmdp, address);
> > -
> > #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
> > #ifdef CONFIG_XPA
> > write_c0_entrylo0(pte_to_entrylo(ptep->pte_high));
> > --
> > 2.35.3
> >
>
> I just bisected a crash while powering down a MIPS machine in QEMU to
> this change as commit 8044511d3893 ("mips: update_mmu_cache() can
> replace __update_tlb()") in linux-next.

Thank you, Nathan, that's very helpful indeed. This patch certainly knew
that it wanted testing, and I'm glad to hear that it is now seeing some.

While powering down? The messages below look like it was just coming up,
but no doubt that's because you were bisecting (or because I'm unfamiliar
with what messages to expect there). It's probably irrelevant information,
but I wonder whether the (V)machine worked well enough for a while before
you first powered down and spotted the problem, or whether it's never got
much further than trying to run init (busybox)? I'm trying to get a feel
for whether the problem occurs under common or uncommon conditions.

> Unfortunately, I can still
> reproduce it with the existing fix you have for this change on the
> mailing list, which is present in next-20230614.

Right, that later fix was only for a build warning, nothing functional
(or at least I hoped that it wasn't making any functional difference).

Thanks a lot for the detailed instructions below: unfortunately, those
would draw me into a realm of testing I've never needed to enter before,
so a lot of time spent on setup and learning. Usually, I just stare at
the source.

What this probably says is that I should revert most my cleanup there,
and keep as close to the existing code as possible. But some change is
needed, and I may need to understand (or have a good guess at) what was
going wrong, to decide what kind of retreat will be successful.

Back to the source for a while: I hope I'll find examples in nearby MIPS
kernel source (and git history), which will hint at the right way forward.
Then send you a patch against next-20230614 to try, when I'm reasonably
confident that it's enough to satisfy my purpose, but likely not to waste
your time.

Thanks, until later,
Hugh

>
> I can reproduce it with the GCC 13.1.0 on kernel.org [1].
>
> $ make -skj"$(nproc)" ARCH=mips CROSS_COMPILE=mips-linux- mrproper malta_defconfig vmlinux
>
> $ qemu-system-mipsel \
> -display none \
> -nodefaults \
> -cpu 24Kf \
> -machine malta \
> -kernel vmlinux \
> -initrd rootfs.cpio \
> -m 512m \
> -serial mon:stdio
> ...
> Linux version 6.4.0-rc6-next-20230614 ([email protected]) (mips-linux-gcc (GCC) 13.1.0, GNU ld (GNU Binutils) 2.40) #1 SMP Wed Jun 14 16:13:02 MST 2023
> ...
> Run /init as init process
> process '/bin/busybox' started with executable stack
> do_page_fault(): sending SIGSEGV to init for invalid read access from 0000003c
> epc = 77b893dc in ld-uClibc-1.0.39.so[77b84000+8000]
> ra = 77b8930c in ld-uClibc-1.0.39.so[77b84000+8000]
> Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>
> The rootfs is available at [2] if it is needed. I am more than happy to
> provide additional information or test patches if necessary.
>
> [1]: https://mirrors.edge.kernel.org/pub/tools/crosstool/
> [2]: https://github.com/ClangBuiltLinux/boot-utils/releases/download/20230609-194440/mipsel-rootfs.cpio.zst
>
> Cheers,
> Nathan

2023-06-15 05:48:27

by Hugh Dickins

[permalink] [raw]

Subject: Re: [PATCH v2 07/23] mips: update_mmu_cache() can replace __update_tlb()

On Wed, 14 Jun 2023, Hugh Dickins wrote:
> On Wed, 14 Jun 2023, Nathan Chancellor wrote:
> >
> > I just bisected a crash while powering down a MIPS machine in QEMU to
> > this change as commit 8044511d3893 ("mips: update_mmu_cache() can
> > replace __update_tlb()") in linux-next.
>
> Thank you, Nathan, that's very helpful indeed. This patch certainly knew
> that it wanted testing, and I'm glad to hear that it is now seeing some.
>
> While powering down? The messages below look like it was just coming up,
> but no doubt that's because you were bisecting (or because I'm unfamiliar
> with what messages to expect there). It's probably irrelevant information,
> but I wonder whether the (V)machine worked well enough for a while before
> you first powered down and spotted the problem, or whether it's never got
> much further than trying to run init (busybox)? I'm trying to get a feel
> for whether the problem occurs under common or uncommon conditions.
>
> > Unfortunately, I can still
> > reproduce it with the existing fix you have for this change on the
> > mailing list, which is present in next-20230614.
>
> Right, that later fix was only for a build warning, nothing functional
> (or at least I hoped that it wasn't making any functional difference).
>
> Thanks a lot for the detailed instructions below: unfortunately, those
> would draw me into a realm of testing I've never needed to enter before,
> so a lot of time spent on setup and learning. Usually, I just stare at
> the source.
>
> What this probably says is that I should revert most my cleanup there,
> and keep as close to the existing code as possible. But some change is
> needed, and I may need to understand (or have a good guess at) what was
> going wrong, to decide what kind of retreat will be successful.
>
> Back to the source for a while: I hope I'll find examples in nearby MIPS
> kernel source (and git history), which will hint at the right way forward.
> Then send you a patch against next-20230614 to try, when I'm reasonably
> confident that it's enough to satisfy my purpose, but likely not to waste
> your time.

I'm going to take advantage of your good nature by attaching
two alternative patches, either to go on top of next-20230614.

mips1.patch,
arch/mips/mm/tlb-r4k.c | 12 +-----------
1 file changed, 1 insertion(+), 11 deletions(-)

is by far my favourite. I couldn't see anything wrong with what's
already there for mips, but it seems possible that (though I didn't
find it) somewhere calls update_mmu_cache_pmd() on a page table. So
mips1.patch restores the pmd_huge() check, and cleans up further by
removing the silly pgdp, p4dp, pudp, pmdp stuff: the pointer has now
been passed in by the caller, why walk the tree again? I should have
done it this way before.

But if that doesn't work, then I'm afraid it will have to be
mips2.patch,
arch/mips/include/asm/pgtable.h | 15 ++++++++++++---
arch/mips/mm/tlb-r3k.c | 5 ++---
arch/mips/mm/tlb-r4k.c | 27 ++++++++++++++++++---------
3 files changed, 32 insertions(+), 15 deletions(-)

which reverts all of the original patch and its build warning fix,
and does a pte_unmap() to balance the silly pte_offset_map() there;
with an apologetic comment for this being about the only place in
the tree where I have no idea what to do if ptep were NULL.

I do hope that you find the first fixes the breakage; but if not, then
I even more fervently hope that the second will, despite my hating it.
Touch wood for the first, fingers crossed for the second, thanks,

Hugh

Attachments:

mips1.patch (900.00 B)
mips2.patch (3.83 kB)
Download all attachments

2023-06-15 16:04:40

by Nathan Chancellor

[permalink] [raw]

Subject: Re: [PATCH v2 07/23] mips: update_mmu_cache() can replace __update_tlb()

On Wed, Jun 14, 2023 at 10:43:30PM -0700, Hugh Dickins wrote:
> On Wed, 14 Jun 2023, Hugh Dickins wrote:
> > On Wed, 14 Jun 2023, Nathan Chancellor wrote:
> > >
> > > I just bisected a crash while powering down a MIPS machine in QEMU to
> > > this change as commit 8044511d3893 ("mips: update_mmu_cache() can
> > > replace __update_tlb()") in linux-next.
> >
> > Thank you, Nathan, that's very helpful indeed. This patch certainly knew
> > that it wanted testing, and I'm glad to hear that it is now seeing some.
> >
> > While powering down? The messages below look like it was just coming up,
> > but no doubt that's because you were bisecting (or because I'm unfamiliar
> > with what messages to expect there). It's probably irrelevant information,
> > but I wonder whether the (V)machine worked well enough for a while before
> > you first powered down and spotted the problem, or whether it's never got
> > much further than trying to run init (busybox)? I'm trying to get a feel
> > for whether the problem occurs under common or uncommon conditions.

Ugh sorry, I have been looking into too many bugs lately and got my
wires crossed :) this is indeed a problem when running init (which is
busybox, this is a simple Buildroot file system).

> > > Unfortunately, I can still
> > > reproduce it with the existing fix you have for this change on the
> > > mailing list, which is present in next-20230614.
> >
> > Right, that later fix was only for a build warning, nothing functional
> > (or at least I hoped that it wasn't making any functional difference).
> >
> > Thanks a lot for the detailed instructions below: unfortunately, those
> > would draw me into a realm of testing I've never needed to enter before,
> > so a lot of time spent on setup and learning. Usually, I just stare at
> > the source.
> >
> > What this probably says is that I should revert most my cleanup there,
> > and keep as close to the existing code as possible. But some change is
> > needed, and I may need to understand (or have a good guess at) what was
> > going wrong, to decide what kind of retreat will be successful.
> >
> > Back to the source for a while: I hope I'll find examples in nearby MIPS
> > kernel source (and git history), which will hint at the right way forward.
> > Then send you a patch against next-20230614 to try, when I'm reasonably
> > confident that it's enough to satisfy my purpose, but likely not to waste
> > your time.
>
> I'm going to take advantage of your good nature by attaching
> two alternative patches, either to go on top of next-20230614.
>
> mips1.patch,
> arch/mips/mm/tlb-r4k.c | 12 +-----------
> 1 file changed, 1 insertion(+), 11 deletions(-)
>
> is by far my favourite. I couldn't see anything wrong with what's
> already there for mips, but it seems possible that (though I didn't
> find it) somewhere calls update_mmu_cache_pmd() on a page table. So
> mips1.patch restores the pmd_huge() check, and cleans up further by
> removing the silly pgdp, p4dp, pudp, pmdp stuff: the pointer has now
> been passed in by the caller, why walk the tree again? I should have
> done it this way before.
>
> But if that doesn't work, then I'm afraid it will have to be
> mips2.patch,
> arch/mips/include/asm/pgtable.h | 15 ++++++++++++---
> arch/mips/mm/tlb-r3k.c | 5 ++---
> arch/mips/mm/tlb-r4k.c | 27 ++++++++++++++++++---------
> 3 files changed, 32 insertions(+), 15 deletions(-)
>
> which reverts all of the original patch and its build warning fix,
> and does a pte_unmap() to balance the silly pte_offset_map() there;
> with an apologetic comment for this being about the only place in
> the tree where I have no idea what to do if ptep were NULL.
>
> I do hope that you find the first fixes the breakage; but if not, then

I hate to be the bearer of bad news but the first patch did not fix the
breakage, I see the same issue.

> I even more fervently hope that the second will, despite my hating it.
> Touch wood for the first, fingers crossed for the second, thanks,

Thankfully, the second one does. Thanks for the quick and thoughtful
responses!

Cheers,
Nathan

2023-06-15 21:43:28

by Hugh Dickins

[permalink] [raw]

Subject: Re: [PATCH v2 07/23] mips: update_mmu_cache() can replace __update_tlb()

On Thu, 15 Jun 2023, Nathan Chancellor wrote:
> On Wed, Jun 14, 2023 at 10:43:30PM -0700, Hugh Dickins wrote:
> >
> > I do hope that you find the first fixes the breakage; but if not, then
>
> I hate to be the bearer of bad news but the first patch did not fix the
> breakage, I see the same issue.

Boo!

>
> > I even more fervently hope that the second will, despite my hating it.
> > Touch wood for the first, fingers crossed for the second, thanks,
>
> Thankfully, the second one does. Thanks for the quick and thoughtful
> responses!

Hurrah!

Thanks a lot, Nathan. I'll set aside my disappointment and curiosity,
clearly I'm not going to have much of a future as a MIPS programmer.

I must take a break, then rush Andrew the second patch, well, not
exactly that second patch, since most of that is revert: I'll just
send the few lines of replacement patch (with a new Subject line, as
update_mmu_cache() goes back to being separate from __update_tlb()).

Unless you object, I'll include a Tested-by: you. I realize that
your testing is limited to seeing it running; but that's true of
most of the testing at this stage - it gets to be more interesting
when the patch that adds the rcu_read_lock() and rcu_read_unlock()
is added on top later.

Thanks again,
Hugh

2023-06-15 22:15:29

by Yu Zhao

[permalink] [raw]

Subject: Re: [PATCH v2 07/23] mips: update_mmu_cache() can replace __update_tlb()

On Wed, Jun 14, 2023 at 04:17:58PM -0700, Nathan Chancellor wrote:
> Hi Hugh,
>
> On Thu, Jun 08, 2023 at 12:17:24PM -0700, Hugh Dickins wrote:
> > Don't make update_mmu_cache() a wrapper around __update_tlb(): call it
> > directly, and use the ptep (or pmdp) provided by the caller, instead of
> > re-calling pte_offset_map() - which would raise a question of whether a
> > pte_unmap() is needed to balance it.
> >
> > Check whether the "ptep" provided by the caller is actually the pmdp,
> > instead of testing pmd_huge(): or test pmd_huge() too and warn if it
> > disagrees? This is "hazardous" territory: needs review and testing.
> >
> > Signed-off-by: Hugh Dickins <[email protected]>
> > ---
> > arch/mips/include/asm/pgtable.h | 15 +++------------
> > arch/mips/mm/tlb-r3k.c | 5 +++--
> > arch/mips/mm/tlb-r4k.c | 9 +++------
> > 3 files changed, 9 insertions(+), 20 deletions(-)
> >
>
> I just bisected a crash while powering down a MIPS machine in QEMU to
> this change as commit 8044511d3893 ("mips: update_mmu_cache() can
> replace __update_tlb()") in linux-next. Unfortunately, I can still
> reproduce it with the existing fix you have for this change on the
> mailing list, which is present in next-20230614.
>
> I can reproduce it with the GCC 13.1.0 on kernel.org [1].
>
> $ make -skj"$(nproc)" ARCH=mips CROSS_COMPILE=mips-linux- mrproper malta_defconfig vmlinux
>
> $ qemu-system-mipsel \
> -display none \
> -nodefaults \
> -cpu 24Kf \
> -machine malta \
> -kernel vmlinux \
> -initrd rootfs.cpio \
> -m 512m \
> -serial mon:stdio
> ...
> Linux version 6.4.0-rc6-next-20230614 ([email protected]) (mips-linux-gcc (GCC) 13.1.0, GNU ld (GNU Binutils) 2.40) #1 SMP Wed Jun 14 16:13:02 MST 2023
> ...
> Run /init as init process
> process '/bin/busybox' started with executable stack
> do_page_fault(): sending SIGSEGV to init for invalid read access from 0000003c
> epc = 77b893dc in ld-uClibc-1.0.39.so[77b84000+8000]
> ra = 77b8930c in ld-uClibc-1.0.39.so[77b84000+8000]
> Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
>
> The rootfs is available at [2] if it is needed. I am more than happy to
> provide additional information or test patches if necessary.
>
> [1]: https://mirrors.edge.kernel.org/pub/tools/crosstool/
> [2]: https://github.com/ClangBuiltLinux/boot-utils/releases/download/20230609-194440/mipsel-rootfs.cpio.zst

Seeing this on real h/w as well (just to confirm).

Linux version 6.4.0-rc4-00437-g4bab5c42a698 ([email protected]) (mips64el-linux-gnuabi64-gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40) #3 SMP PREEMPT Thu Jun 15 01:05:20 MDT 2023
Skipping L2 locking due to reduced L2 cache size
CVMSEG size: 2 cache lines (256 bytes)
printk: bootconsole [early0] enabled
CPU0 revision is: 000d9602 (Cavium Octeon III)
FPU revision is: 00739600
Kernel sections are not in the memory maps
Wasting 243712 bytes for tracking 4352 unused pages
Initrd not found or empty - disabling initrd
Using passed Device Tree.
software IO TLB: SWIOTLB bounce buffer size adjusted to 0MB
software IO TLB: area num 1.
software IO TLB: mapped [mem 0x000000000370d000-0x000000000374d000] (0MB)
Primary instruction cache 78kB, virtually tagged, 39 way, 16 sets, linesize 128 bytes.
Primary data cache 32kB, 32-way, 8 sets, linesize 128 bytes.
Zone ranges:
DMA32 [mem 0x0000000001100000-0x00000000efffffff]
Normal empty
Movable zone start for each node
Early memory node ranges
node 0: [mem 0x0000000001100000-0x0000000003646fff]
node 0: [mem 0x0000000003700000-0x000000000fafffff]
node 0: [mem 0x0000000020000000-0x000000004ebfffff]
Initmem setup node 0 [mem 0x0000000001100000-0x000000004ebfffff]
On node 0, zone DMA32: 4352 pages in unavailable ranges
On node 0, zone DMA32: 185 pages in unavailable ranges
On node 0, zone DMA32: 1280 pages in unavailable ranges
On node 0, zone DMA32: 5120 pages in unavailable ranges
percpu: Embedded 15 pages/cpu s24368 r8192 d28880 u61440
pcpu-alloc: s24368 r8192 d28880 u61440 alloc=15*4096
pcpu-alloc: [0] 0 [0] 1 [0] 2 [0] 3
Kernel command line: loglevel=8 console=ttyS0,115200
printk: log_buf_len individual max cpu contribution: 4096 bytes
printk: log_buf_len total cpu_extra contributions: 12288 bytes
printk: log_buf_len min size: 16384 bytes
printk: log_buf_len: 32768 bytes
printk: early log buf free: 14184(86%)
Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes, linear)
Inode-cache hash table entries: 65536 (order: 7, 524288 bytes, linear)
Built 1 zonelists, mobility grouping on. Total pages: 247772
mem auto-init: stack:all(zero), heap alloc:off, heap free:off
Memory: 950032K/1004828K available (8058K kernel code, 575K rwdata, 1880K rodata, 27488K init, 158K bss, 54796K reserved, 0K cma-reserved)
rcu: Preemptible hierarchical RCU implementation.
rcu: RCU event tracing is enabled.
rcu: RCU restricting CPUs from NR_CPUS=32 to nr_cpu_ids=4.
rcu: RCU calculated value of scheduler-enlistment delay is 10 jiffies.
rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=4
NR_IRQS: 512
CIB interrupt controller probed: 800107000000e000 23
CIB interrupt controller probed: 800107000000e200 12
CIB interrupt controller probed: 800107000000e400 6
CIB interrupt controller probed: 800107000000ec00 15
CIB interrupt controller probed: 800107000000e600 4
CIB interrupt controller probed: 800107000000e800 11
CIB interrupt controller probed: 800107000000e900 11
rcu: srcu_init: Setting srcu_struct sizes based on contention.
clocksource: OCTEON_CVMCOUNT: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
Calibrating delay loop (skipped) preset value.. 2000.00 BogoMIPS (lpj=10000000)
pid_max: default: 32768 minimum: 301
LSM: initializing lsm=capability,integrity
Mount-cache hash table entries: 2048 (order: 2, 16384 bytes, linear)
Mountpoint-cache hash table entries: 2048 (order: 2, 16384 bytes, linear)
rcu: Hierarchical SRCU implementation.
rcu: Max phase no-delay instances is 1000.
smp: Bringing up secondary CPUs ...
SMP: Booting CPU01 (CoreId 1)...
CPU1 revision is: 000d9602 (Cavium Octeon III)
FPU revision is: 00739600
SMP: Booting CPU02 (CoreId 2)...
CPU2 revision is: 000d9602 (Cavium Octeon III)
FPU revision is: 00739600
SMP: Booting CPU03 (CoreId 3)...
CPU3 revision is: 000d9602 (Cavium Octeon III)
FPU revision is: 00739600
smp: Brought up 1 node, 4 CPUs
devtmpfs: initialized
clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 19112604462750000 ns
futex hash table entries: 1024 (order: 5, 131072 bytes, linear)
NET: Registered PF_NETLINK/PF_ROUTE protocol family
PCIe: Initializing port 0
PCIe: BIST2 FAILED for port 0 (0x0000000000000003)
PCIe: Link timeout on port 0, probably the slot is empty
PCIe: Initializing port 1
PCIe: BIST FAILED for port 1 (0xffffffffffffffff)
PCIe: Link timeout on port 1, probably the slot is empty
HugeTLB: registered 2.00 MiB page size, pre-allocated 0 pages
HugeTLB: 0 KiB vmemmap can be freed for a 2.00 MiB page
SCSI subsystem initialized
libata version 3.00 loaded.
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
pps_core: LinuxPPS API ver. 1 registered
pps_core: Software ver. 5.3.6 - Copyright 2005-2007 Rodolfo Giometti <[email protected]>
PTP clock support registered
EDAC MC: Ver: 3.0.0
PCI host bridge to bus 0000:00
pci_bus 0000:00: root bus resource [mem 0x1000000000000]
pci_bus 0000:00: root bus resource [io 0x0000]
pci_bus 0000:00: No busn resource found for root bus, will use [bus 00-ff]
pci_bus 0000:00: busn_res: [bus 00-ff] end is updated to 00
vgaarb: loaded
clocksource: Switched to clocksource OCTEON_CVMCOUNT
NET: Registered PF_INET protocol family
IP idents hash table entries: 16384 (order: 5, 131072 bytes, linear)
tcp_listen_portaddr_hash hash table entries: 512 (order: 1, 8192 bytes, linear)
Table-perturb hash table entries: 65536 (order: 6, 262144 bytes, linear)
TCP established hash table entries: 8192 (order: 4, 65536 bytes, linear)
TCP bind hash table entries: 8192 (order: 6, 262144 bytes, linear)
TCP: Hash tables configured (established 8192 bind 8192)
UDP hash table entries: 512 (order: 2, 16384 bytes, linear)
UDP-Lite hash table entries: 512 (order: 2, 16384 bytes, linear)
NET: Registered PF_UNIX/PF_LOCAL protocol family
RPC: Registered named UNIX socket transport module.
RPC: Registered udp transport module.
RPC: Registered tcp transport module.
RPC: Registered tcp NFSv4.1 backchannel transport module.
PCI: CLS 0 bytes, default 128
platform 1180068000000.uctl: clocks initialized.
platform 1180069000000.uctl: clocks initialized.
Starting KVM with MIPS VZ extensions
workingset: timestamp_bits=62 max_order=18 bucket_order=0
NFS: Registering the id_resolver key type
Key type id_resolver registered
Key type id_legacy registered
nfs4filelayout_init: NFSv4 File Layout Driver Registering...
nfs4flexfilelayout_init: NFSv4 Flexfile Layout Driver Registering...
io scheduler mq-deadline registered
io scheduler kyber registered
io scheduler bfq registered
gpio gpiochip0: Static allocation of GPIO base is deprecated, use dynamic allocation.
octeon_gpio 1070000000800.gpio-controller: OCTEON GPIO driver probed.
Serial: 8250/16550 driver, 2 ports, IRQ sharing disabled
printk: console [ttyS0] disabled
1180000000800.serial: ttyS0 at MMIO 0x1180000000800 (irq = 34, base_baud = 25000000) is a OCTEON
printk: console [ttyS0] enabled
printk: console [ttyS0] enabled
printk: bootconsole [early0] disabled
printk: bootconsole [early0] disabled
1180000000c00.serial: ttyS1 at MMIO 0x1180000000c00 (irq = 35, base_baud = 25000000) is a OCTEON
loop: module loaded
Driver 'pata_octeon_cf' needs updating - please use bus_type methods
slram: not enough parameters.
spi-octeon 1070000001000.spi: OCTEON SPI bus driver
process '/bin/kmod' started with executable stack
do_page_fault(): sending SIGSEGV to modprobe for invalid read access from 0000000000000298
epc = 000000fff3346470 in ld.so.1[fff3328000+2e000]
ra = 000000fff33456d0 in ld.so.1[fff3328000+2e000]
do_page_fault(): sending SIGSEGV to modprobe for invalid read access from 0000000000000298
epc = 000000fff3c78470 in ld.so.1[fff3c5a000+2e000]
ra = 000000fff3c776d0 in ld.so.1[fff3c5a000+2e000]
do_page_fault(): sending SIGSEGV to modprobe for invalid read access from 0000000000021da8
epc = 000000fff35aa2c0 in ld.so.1[fff358d000+2e000]
ra = 000000fff35aa688 in ld.so.1[fff358d000+2e000]
do_page_fault(): sending SIGSEGV to modprobe for invalid read access from 0000000000000298
epc = 000000fff34cc470 in ld.so.1[fff34ae000+2e000]
ra = 000000fff34cb6d0 in ld.so.1[fff34ae000+2e000]
mdio_octeon 1180000001800.mdio: Probed
mdio_octeon 1180000001900.mdio: Probed
dwc3 1680000000000.xhci: Configuration mismatch. dr_mode forced to host
dwc3 1690000000000.xhci: Configuration mismatch. dr_mode forced to host
xhci-hcd xhci-hcd.0.auto: xHCI Host Controller
xhci-hcd xhci-hcd.0.auto: new USB bus registered, assigned bus number 1
xhci-hcd xhci-hcd.0.auto: hcc params 0x0220f06d hci version 0x100 quirks 0x0000000002010010
xhci-hcd xhci-hcd.0.auto: irq 25, io mem 0x1680000000000
dwc3 1680000000000.xhci: xhci_plat_probe get usb3phy fail (ret=-6)
xhci-hcd xhci-hcd.0.auto: xHCI Host Controller
xhci-hcd xhci-hcd.0.auto: new USB bus registered, assigned bus number 2
xhci-hcd xhci-hcd.0.auto: Host supports USB 3.0 SuperSpeed
hub 1-0:1.0: USB hub found
hub 1-0:1.0: 1 port detected
usb usb2: We don't know the algorithms for LPM for this host, disabling LPM.
hub 2-0:1.0: USB hub found
hub 2-0:1.0: 1 port detected
xhci-hcd xhci-hcd.1.auto: xHCI Host Controller
xhci-hcd xhci-hcd.1.auto: new USB bus registered, assigned bus number 3
xhci-hcd xhci-hcd.1.auto: hcc params 0x0220f06d hci version 0x100 quirks 0x0000000002010010
xhci-hcd xhci-hcd.1.auto: irq 26, io mem 0x1690000000000
dwc3 1690000000000.xhci: xhci_plat_probe get usb3phy fail (ret=-6)
xhci-hcd xhci-hcd.1.auto: xHCI Host Controller
xhci-hcd xhci-hcd.1.auto: new USB bus registered, assigned bus number 4
xhci-hcd xhci-hcd.1.auto: Host supports USB 3.0 SuperSpeed
hub 3-0:1.0: USB hub found
hub 3-0:1.0: 1 port detected
usb usb4: We don't know the algorithms for LPM for this host, disabling LPM.
hub 4-0:1.0: USB hub found
hub 4-0:1.0: 1 port detected
usbcore: registered new interface driver usb-storage
i2c-octeon 1180000001000.i2c: probed
i2c-octeon 1180000001200.i2c: probed
octeon_wdt: Initial granularity 5 Sec
EDAC DEVICE0: Giving out device to module octeon-cpu controller cache: DEV octeon_pc_edac (INTERRUPT)
EDAC DEVICE1: Giving out device to module octeon-l2c controller octeon_l2c_err: DEV octeon_l2c_edac (POLLED)
octeon_lmc_edac octeon_lmc_edac.0: Disabled (ECC not enabled)
Interface 0 has 4 ports (SGMII)
Interface 1 has 4 ports (SGMII)
Interface 3 has 4 ports (LOOP)
NET: Registered PF_INET6 protocol family
Segment Routing with IPv6
In-situ OAM (IOAM) with IPv6
sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver
NET: Registered PF_PACKET protocol family
Key type dns_resolver registered
OF: fdt: not creating '/sys/firmware/fdt': CRC check failed
Freeing unused kernel image (initmem) memory: 27488K
This architecture does not have kernel memory protection.
Run /init as init process
with arguments:
/init
with environment:
HOME=/
TERM=linux
do_page_fault(): sending SIGSEGV to init for invalid read access from 0000000000021da8
epc = 000000fff3a542c0 in ld.so.1[fff3a37000+2e000]
ra = 000000fff3a54688 in ld.so.1[fff3a37000+2e000]
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---

2023-06-15 23:20:34

by Hugh Dickins

[permalink] [raw]

Subject: [PATCH v2 07/23 replacement] mips: add pte_unmap() to balance pte_offset_map()

To keep balance in future, __update_tlb() remember to pte_unmap() after
pte_offset_map(). This is an odd case, since the caller has already done
pte_offset_map_lock(), then mips forgets the address and recalculates it;
but my two naive attempts to clean that up did more harm than good.

Tested-by: Nathan Chancellor <[email protected]>
Signed-off-by: Hugh Dickins <[email protected]>
---
Andrew, please replace my mips patch, and its build warning fix patch,
in mm-unstable by this less ambitious but working replacement - thanks.

arch/mips/mm/tlb-r4k.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 1b939abbe4ca..93c2d695588a 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -297,7 +297,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
p4d_t *p4dp;
pud_t *pudp;
pmd_t *pmdp;
- pte_t *ptep;
+ pte_t *ptep, *ptemap = NULL;
int idx, pid;

/*
@@ -344,7 +344,12 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
} else
#endif
{
- ptep = pte_offset_map(pmdp, address);
+ ptemap = ptep = pte_offset_map(pmdp, address);
+ /*
+ * update_mmu_cache() is called between pte_offset_map_lock()
+ * and pte_unmap_unlock(), so we can assume that ptep is not
+ * NULL here: and what should be done below if it were NULL?
+ */

#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
#ifdef CONFIG_XPA
@@ -373,6 +378,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
tlbw_use_hazard();
htw_start();
flush_micro_tlb_vm(vma);
+
+ if (ptemap)
+ pte_unmap(ptemap);
local_irq_restore(flags);
}

--
2.35.3

2023-06-17 04:10:09

by Yu Zhao

[permalink] [raw]

Subject: Re: [PATCH v2 07/23 replacement] mips: add pte_unmap() to balance pte_offset_map()

On Thu, Jun 15, 2023 at 04:02:43PM -0700, Hugh Dickins wrote:
> To keep balance in future, __update_tlb() remember to pte_unmap() after
> pte_offset_map(). This is an odd case, since the caller has already done
> pte_offset_map_lock(), then mips forgets the address and recalculates it;
> but my two naive attempts to clean that up did more harm than good.
>
> Tested-by: Nathan Chancellor <[email protected]>
> Signed-off-by: Hugh Dickins <[email protected]>

FWIW: Tested-by: Yu Zhao <[email protected]>

There is another problem, likely caused by khugepaged, happened multiple times. But I don't think it's related to your series, just FYI.

Got mcheck at ffffffff81134ef0
CPU: 3 PID: 36 Comm: khugepaged Not tainted 6.4.0-rc6-00049-g62d8779610bb-dirty #1
$ 0 : 0000000000000000 0000000000000014 40000000011ac004 4000000000000000
$ 4 : c000000000000000 0000000000000045 000000011a80045b 000000011a80045b
$ 8 : 8000000080188000 ffffffff81b526c0 0000000000000200 0000000000000000
$12 : 0000000000000028 ffffffff81910cb4 0000000000000000 0000000000000207
$16 : 000000aaab800000 80000000037ee990 ffffffff81b50200 8000000005066ae0
$20 : 0000000000000001 ffffffff80000000 ffffffff81c10000 000000aaab800000
$24 : 0000000000000002 ffffffff812b75f8
$28 : 8000000002310000 8000000002313b00 ffffffff81b50000 ffffffff81134d88
Hi : 000000000000017a
Lo : 0000000000000000
epc : ffffffff81134ef0 __update_tlb+0x260/0x2a0
ra : ffffffff81134d88 __update_tlb+0xf8/0x2a0
Status: 14309ce2 KX SX UX KERNEL EXL
Cause : 00800060 (ExcCode 18)
PrId : 000d9602 (Cavium Octeon III)
CPU: 3 PID: 36 Comm: khugepaged Not tainted 6.4.0-rc6-00049-g62d8779610bb-dirty #1
Stack : 0000000000000001 0000000000000000 0000000000000008 8000000002313768
8000000002313768 80000000023138f8 0000000000000000 0000000000000000
a6c8cd76e1667e00 8000000001db4f28 0000000000000001 30302d3663722d30
643236672d393430 0000000000000010 ffffffff81910cc0 0000000000000000
8000000001d96bcc 0000000000000000 0000000000000000 ffffffff81a68ed0
ffffffff81b50000 0000000000000001 ffffffff80000000 ffffffff81c10000
000000aaab800000 0000000000000002 ffffffff815b78c0 ffffffffa184e710
00000000000000c0 8000000002310000 8000000002313760 ffffffff81b50000
ffffffff8111c9cc 0000000000000000 0000000000000000 0000000000000000
0000000000000000 0000000000000000 ffffffff8111c9ec 0000000000000000
...
Call Trace:
[<ffffffff8111c9ec>] show_stack+0x64/0x158
[<ffffffff81920078>] dump_stack_lvl+0x5c/0x7c
[<ffffffff8111e03c>] do_mcheck+0x2c/0x98
[<ffffffff81118608>] handle_mcheck_int+0x38/0x50

Index : 80000000
PageMask : 1fe000
EntryHi : 000000aaab8000bd
EntryLo0 : 40000000011a8004
EntryLo1 : 40000000011ac004
Wired : 0
PageGrain: e8000000

Index: 2 pgmask=4kb va=c00000feffff4000 asid=b9
[ri=0 xi=0 pa=000022a7000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=000022af000 c=0 d=1 v=1 g=1]
Index: 3 pgmask=4kb va=c00000feffff8000 asid=b9
[ri=0 xi=0 pa=00002380000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00002381000 c=0 d=1 v=1 g=1]
Index: 4 pgmask=4kb va=c00000feffffa000 asid=b9
[ri=0 xi=0 pa=000023e9000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=000023ea000 c=0 d=1 v=1 g=1]
Index: 5 pgmask=4kb va=c00000feffffe000 asid=b9
[ri=0 xi=0 pa=00002881000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00002882000 c=0 d=1 v=1 g=1]
Index: 6 pgmask=4kb va=c00000fefffb0000 asid=b9
[ri=0 xi=0 pa=00002cc2000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00002cc3000 c=0 d=1 v=1 g=1]
Index: 7 pgmask=4kb va=c00000feffffc000 asid=b9
[ri=0 xi=0 pa=000023eb000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00002880000 c=0 d=1 v=1 g=1]
Index: 8 pgmask=4kb va=c00000feffff6000 asid=b9
[ri=0 xi=0 pa=0000237e000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=0000237f000 c=0 d=1 v=1 g=1]
Index: 14 pgmask=4kb va=c00000fefff62000 asid=8e
[ri=0 xi=0 pa=00007477000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=0000745e000 c=0 d=1 v=1 g=1]
Index: 15 pgmask=4kb va=c00000fefff52000 asid=8e
[ri=0 xi=0 pa=0000744c000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=0000616d000 c=0 d=1 v=1 g=1]
Index: 16 pgmask=4kb va=c00000fefff42000 asid=8e
[ri=0 xi=0 pa=00006334000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=0000616b000 c=0 d=1 v=1 g=1]
Index: 19 pgmask=4kb va=c00000fefffb6000 asid=8e
[ri=0 xi=0 pa=00005050000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00005051000 c=0 d=1 v=1 g=1]
Index: 20 pgmask=4kb va=c00000fefff72000 asid=b9
[ri=0 xi=0 pa=00007504000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00007503000 c=0 d=1 v=1 g=1]
Index: 58 pgmask=4kb va=c00000fefffaa000 asid=8e
[ri=0 xi=0 pa=00005126000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00005127000 c=0 d=1 v=1 g=1]
Index: 59 pgmask=4kb va=c00000fefffba000 asid=8e
[ri=0 xi=0 pa=00005129000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=0000512a000 c=0 d=1 v=1 g=1]
Index: 79 pgmask=4kb va=c000000000060000 asid=8e
[ri=0 xi=0 pa=0000534b000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=000062f9000 c=0 d=1 v=1 g=1]
Index: 80 pgmask=4kb va=c00000000005e000 asid=8e
[ri=0 xi=0 pa=00000000000 c=0 d=0 v=0 g=1] [ri=0 xi=0 pa=00004013000 c=0 d=1 v=1 g=1]
Index: 81 pgmask=4kb va=c0000000003a0000 asid=8e
[ri=0 xi=0 pa=000060c6000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=0000340e000 c=0 d=1 v=1 g=1]
Index: 82 pgmask=4kb va=c00000000039e000 asid=8e
[ri=0 xi=0 pa=00000000000 c=0 d=0 v=0 g=1] [ri=0 xi=0 pa=000060c5000 c=0 d=1 v=1 g=1]
Index: 83 pgmask=4kb va=c00000000003e000 asid=8e
[ri=0 xi=0 pa=00002bf3000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00002c42000 c=0 d=1 v=1 g=1]
Index: 84 pgmask=4kb va=c000000000042000 asid=8e
[ri=0 xi=0 pa=00002c45000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00002c46000 c=0 d=1 v=1 g=1]
Index: 85 pgmask=4kb va=0aaab820000 asid=bd
[ri=0 xi=0 pa=00000000000 c=0 d=0 v=0 g=0] [ri=0 xi=0 pa=00000000000 c=0 d=0 v=0 g=0]
Index: 86 pgmask=4kb va=0aaab748000 asid=bd
[ri=0 xi=1 pa=0003c959000 c=0 d=1 v=1 g=0] [ri=0 xi=1 pa=0000f7b6000 c=0 d=0 v=1 g=0]
Index: 87 pgmask=4kb va=0fff37c4000 asid=bd
[ri=0 xi=0 pa=0000bd23000 c=0 d=0 v=1 g=0] [ri=0 xi=0 pa=0000bd24000 c=0 d=0 v=1 g=0]
Index: 88 pgmask=4kb va=0fff3992000 asid=bd
[ri=0 xi=1 pa=0000bfcd000 c=0 d=0 v=1 g=0] [ri=0 xi=1 pa=0002977b000 c=0 d=0 v=1 g=0]
Index: 89 pgmask=4kb va=0fff3288000 asid=bd
[ri=0 xi=0 pa=00032b62000 c=0 d=0 v=1 g=0] [ri=0 xi=0 pa=00032b63000 c=0 d=0 v=1 g=0]
Index: 90 pgmask=4kb va=0fff3982000 asid=bd
[ri=0 xi=1 pa=0002d6a3000 c=0 d=1 v=1 g=0] [ri=0 xi=1 pa=0002a423000 c=0 d=0 v=1 g=0]
Index: 91 pgmask=4kb va=0fffbb5e000 asid=bd
[ri=0 xi=0 pa=00028949000 c=0 d=1 v=1 g=0] [ri=0 xi=0 pa=00035060000 c=0 d=1 v=1 g=0]
Index: 92 pgmask=4kb va=c00000fefffe2000 asid=8e
[ri=0 xi=0 pa=000020f0000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=000020ff000 c=0 d=1 v=1 g=1]
Index: 93 pgmask=4kb va=c00000fefffd2000 asid=8e
[ri=0 xi=0 pa=000020b7000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=000020fe000 c=0 d=1 v=1 g=1]
Index: 94 pgmask=4kb va=c00000fefffc2000 asid=8e
[ri=0 xi=0 pa=000020b6000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=000020fd000 c=0 d=1 v=1 g=1]
Index: 110 pgmask=4kb va=c00000feffff2000 asid=bc
[ri=0 xi=0 pa=000020f1000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00002100000 c=0 d=1 v=1 g=1]
Index: 125 pgmask=4kb va=c00000fefffbe000 asid=bc
[ri=0 xi=0 pa=00005268000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=000053dc000 c=0 d=1 v=1 g=1]
Index: 126 pgmask=4kb va=c00000fefffbc000 asid=bc
[ri=0 xi=0 pa=00005266000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00005267000 c=0 d=1 v=1 g=1]
Index: 188 pgmask=4kb va=c00000fefff76000 asid=bb
[ri=0 xi=0 pa=00007576000 c=0 d=1 v=1 g=1] [ri=0 xi=0 pa=00007577000 c=0 d=1 v=1 g=1]

Code: 1000ff92 00601025 00000000 <42000006> 1000ffb8 00000000 00000000 8f820018 00021238
Kernel panic - not syncing: Caught Machine Check exception - caused by multiple matching entries in the TLB.
---[ end Kernel panic - not syncing: Caught Machine Check exception - caused by multiple matching entries in the TLB. ]---

2023-06-18 21:37:27

by Yu Zhao

[permalink] [raw]

Subject: Re: [PATCH v2 07/23 replacement] mips: add pte_unmap() to balance pte_offset_map()

On Fri, Jun 16, 2023 at 9:54 PM Yu Zhao <[email protected]> wrote:
>
> On Thu, Jun 15, 2023 at 04:02:43PM -0700, Hugh Dickins wrote:
> > To keep balance in future, __update_tlb() remember to pte_unmap() after
> > pte_offset_map(). This is an odd case, since the caller has already done
> > pte_offset_map_lock(), then mips forgets the address and recalculates it;
> > but my two naive attempts to clean that up did more harm than good.
> >
> > Tested-by: Nathan Chancellor <[email protected]>
> > Signed-off-by: Hugh Dickins <[email protected]>
>
> FWIW: Tested-by: Yu Zhao <[email protected]>
>
> There is another problem, likely caused by khugepaged, happened multiple times. But I don't think it's related to your series, just FYI.
>
> Got mcheck at ffffffff81134ef0
> CPU: 3 PID: 36 Comm: khugepaged Not tainted 6.4.0-rc6-00049-g62d8779610bb-dirty #1

...

> Kernel panic - not syncing: Caught Machine Check exception - caused by multiple matching entries in the TLB.

In case anyone plans to try to fix this - the problem goes back to at
least 5.15 stable. My (educated) guess is that nobody complained about
it because all the testing is done in QEMU, which does NOT detect
conflicting TLBs. This means the verification of the fix would need to
be on a real piece of h/w or an updated QEMU.

In target/mips/tcg/sysemu/tlb_helper.c:

static void r4k_fill_tlb(CPUMIPSState *env, int idx)
{
r4k_tlb_t *tlb;
uint64_t mask = env->CP0_PageMask >> (TARGET_PAGE_BITS + 1);

/* XXX: detect conflicting TLBs and raise a MCHECK exception when needed */
...