Received-SPF: pass (google.com: domain of linux-kernel+bounces-42319-linux.lists.archive=gmail.com@vger.kernel.org designates 2604:1380:45e3:2400::1 as permitted sender) client-ip=2604:1380:45e3:2400::1;
Date: Mon, 29 Jan 2024 15:49:30 +0800
From: Jisheng Zhang <jszhang@kernel.org>
To: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: paul.walmsley@sifive.com, palmer@dabbelt.com, aou@eecs.berkeley.edu,
	alexghiti@rivosinc.com, samuel.holland@sifive.com,
	ajones@ventanamicro.com, mchitale@ventanamicro.com,
	dylan@andestech.com, sergey.matyukevich@syntacore.com,
	prabhakar.mahadev-lad.rj@bp.renesas.com, apatel@ventanamicro.com,
	linux-riscv@lists.infradead.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] RISC-V: add uniprocessor flush_tlb_range() support
Message-ID: <ZbdYijWK1PnHXn47@xhacker>
References: <20240125062044.63344-1-cuiyunhui@bytedance.com>
Precedence: bulk
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Disposition: inline
In-Reply-To: <20240125062044.63344-1-cuiyunhui@bytedance.com>

On Thu, Jan 25, 2024 at 02:20:44PM +0800, Yunhui Cui wrote:
> Add support for flush_tlb_range() to improve TLB performance for
> UP systems. In order to avoid the mutual inclusion of tlbflush.h
> and hugetlb.h, the UP part is also implemented in tlbflush.c.

Hi Yunhui,

IIRC, Samuel sent similar patch series a few weeks ago.

https://lore.kernel.org/linux-riscv/20240102220134.3229156-1-samuel.holland@sifive.com/

After that series, do you still need this patch?

Thanks
> 
> Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com>
> ---
>  arch/riscv/include/asm/tlbflush.h |  61 ++++++----
>  arch/riscv/mm/Makefile            |   2 +-
>  arch/riscv/mm/tlbflush.c          | 195 ++++++++++++++++++------------
>  3 files changed, 156 insertions(+), 102 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
> index 928f096dca21..426f043fb450 100644
> --- a/arch/riscv/include/asm/tlbflush.h
> +++ b/arch/riscv/include/asm/tlbflush.h
> @@ -10,12 +10,21 @@
>  #include <linux/mm_types.h>
>  #include <asm/smp.h>
>  #include <asm/errata_list.h>
> +#include <asm/tlbbatch.h>
>  
>  #define FLUSH_TLB_MAX_SIZE      ((unsigned long)-1)
>  #define FLUSH_TLB_NO_ASID       ((unsigned long)-1)
>  
>  #ifdef CONFIG_MMU
>  extern unsigned long asid_mask;
> +DECLARE_STATIC_KEY_FALSE(use_asid_allocator);
> +
> +struct flush_tlb_range_data {
> +	unsigned long asid;
> +	unsigned long start;
> +	unsigned long size;
> +	unsigned long stride;
> +};
>  
>  static inline void local_flush_tlb_all(void)
>  {
> @@ -27,12 +36,40 @@ static inline void local_flush_tlb_page(unsigned long addr)
>  {
>  	ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
>  }
> +
> +static inline void local_flush_tlb_all_asid(unsigned long asid)
> +{
> +	if (asid != FLUSH_TLB_NO_ASID)
> +		__asm__ __volatile__ ("sfence.vma x0, %0"
> +				:
> +				: "r" (asid)
> +				: "memory");
> +	else
> +		local_flush_tlb_all();
> +}
> +
> +static inline void local_flush_tlb_page_asid(unsigned long addr,
> +		unsigned long asid)
> +{
> +	if (asid != FLUSH_TLB_NO_ASID)
> +		__asm__ __volatile__ ("sfence.vma %0, %1"
> +				:
> +				: "r" (addr), "r" (asid)
> +				: "memory");
> +	else
> +		local_flush_tlb_page(addr);
> +}
> +
> +static inline unsigned long get_mm_asid(struct mm_struct *mm)
> +{
> +	return static_branch_unlikely(&use_asid_allocator) ?
> +			atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
> +}
>  #else /* CONFIG_MMU */
>  #define local_flush_tlb_all()			do { } while (0)
>  #define local_flush_tlb_page(addr)		do { } while (0)
>  #endif /* CONFIG_MMU */
>  
> -#if defined(CONFIG_SMP) && defined(CONFIG_MMU)
>  void flush_tlb_all(void);
>  void flush_tlb_mm(struct mm_struct *mm);
>  void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
> @@ -55,26 +92,4 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
>  void arch_flush_tlb_batched_pending(struct mm_struct *mm);
>  void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
>  
> -#else /* CONFIG_SMP && CONFIG_MMU */
> -
> -#define flush_tlb_all() local_flush_tlb_all()
> -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
> -
> -static inline void flush_tlb_range(struct vm_area_struct *vma,
> -		unsigned long start, unsigned long end)
> -{
> -	local_flush_tlb_all();
> -}
> -
> -/* Flush a range of kernel pages */
> -static inline void flush_tlb_kernel_range(unsigned long start,
> -	unsigned long end)
> -{
> -	local_flush_tlb_all();
> -}
> -
> -#define flush_tlb_mm(mm) flush_tlb_all()
> -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
> -#endif /* !CONFIG_SMP || !CONFIG_MMU */
> -
>  #endif /* _ASM_RISCV_TLBFLUSH_H */
> diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
> index 2c869f8026a8..7c6c4c858a6b 100644
> --- a/arch/riscv/mm/Makefile
> +++ b/arch/riscv/mm/Makefile
> @@ -19,7 +19,7 @@ obj-y += context.o
>  obj-y += pmem.o
>  
>  ifeq ($(CONFIG_MMU),y)
> -obj-$(CONFIG_SMP) += tlbflush.o
> +obj-y += tlbflush.o
>  endif
>  obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
>  obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index 8d12b26f5ac3..4765603fa08a 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -6,28 +6,36 @@
>  #include <linux/hugetlb.h>
>  #include <asm/sbi.h>
>  #include <asm/mmu_context.h>
> +#include <asm/tlbflush.h>
>  
> -static inline void local_flush_tlb_all_asid(unsigned long asid)
> +static unsigned long get_stride_size(struct vm_area_struct *vma)
>  {
> -	if (asid != FLUSH_TLB_NO_ASID)
> -		__asm__ __volatile__ ("sfence.vma x0, %0"
> -				:
> -				: "r" (asid)
> -				: "memory");
> -	else
> -		local_flush_tlb_all();
> -}
> +	unsigned long stride_size;
>  
> -static inline void local_flush_tlb_page_asid(unsigned long addr,
> -		unsigned long asid)
> -{
> -	if (asid != FLUSH_TLB_NO_ASID)
> -		__asm__ __volatile__ ("sfence.vma %0, %1"
> -				:
> -				: "r" (addr), "r" (asid)
> -				: "memory");
> -	else
> -		local_flush_tlb_page(addr);
> +	if (!is_vm_hugetlb_page(vma))
> +		return PAGE_SIZE;
> +
> +	stride_size = huge_page_size(hstate_vma(vma));
> +
> +	/*
> +	 * As stated in the privileged specification, every PTE in a
> +	 * NAPOT region must be invalidated, so reset the stride in that
> +	 * case.
> +	 */
> +	if (has_svnapot()) {
> +		if (stride_size >= PGDIR_SIZE)
> +			stride_size = PGDIR_SIZE;
> +		else if (stride_size >= P4D_SIZE)
> +			stride_size = P4D_SIZE;
> +		else if (stride_size >= PUD_SIZE)
> +			stride_size = PUD_SIZE;
> +		else if (stride_size >= PMD_SIZE)
> +			stride_size = PMD_SIZE;
> +		else
> +			stride_size = PAGE_SIZE;
> +	}
> +
> +	return stride_size;
>  }
>  
>  /*
> @@ -66,31 +74,12 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
>  		local_flush_tlb_range_threshold_asid(start, size, stride, asid);
>  }
>  
> -void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
> -{
> -	local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID);
> -}
> -
> +#ifdef CONFIG_SMP
>  static void __ipi_flush_tlb_all(void *info)
>  {
>  	local_flush_tlb_all();
>  }
>  
> -void flush_tlb_all(void)
> -{
> -	if (riscv_use_ipi_for_rfence())
> -		on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
> -	else
> -		sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
> -}
> -
> -struct flush_tlb_range_data {
> -	unsigned long asid;
> -	unsigned long start;
> -	unsigned long size;
> -	unsigned long stride;
> -};
> -
>  static void __ipi_flush_tlb_range_asid(void *info)
>  {
>  	struct flush_tlb_range_data *d = info;
> @@ -138,10 +127,18 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
>  		put_cpu();
>  }
>  
> -static inline unsigned long get_mm_asid(struct mm_struct *mm)
> +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
>  {
> -	return static_branch_unlikely(&use_asid_allocator) ?
> -			atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
> +	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
> +			  addr, PAGE_SIZE, PAGE_SIZE);
> +}
> +
> +void flush_tlb_all(void)
> +{
> +	if (riscv_use_ipi_for_rfence())
> +		on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
> +	else
> +		sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
>  }
>  
>  void flush_tlb_mm(struct mm_struct *mm)
> @@ -158,41 +155,12 @@ void flush_tlb_mm_range(struct mm_struct *mm,
>  			  start, end - start, page_size);
>  }
>  
> -void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
> -{
> -	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
> -			  addr, PAGE_SIZE, PAGE_SIZE);
> -}
> -
>  void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
>  		     unsigned long end)
>  {
>  	unsigned long stride_size;
>  
> -	if (!is_vm_hugetlb_page(vma)) {
> -		stride_size = PAGE_SIZE;
> -	} else {
> -		stride_size = huge_page_size(hstate_vma(vma));
> -
> -		/*
> -		 * As stated in the privileged specification, every PTE in a
> -		 * NAPOT region must be invalidated, so reset the stride in that
> -		 * case.
> -		 */
> -		if (has_svnapot()) {
> -			if (stride_size >= PGDIR_SIZE)
> -				stride_size = PGDIR_SIZE;
> -			else if (stride_size >= P4D_SIZE)
> -				stride_size = P4D_SIZE;
> -			else if (stride_size >= PUD_SIZE)
> -				stride_size = PUD_SIZE;
> -			else if (stride_size >= PMD_SIZE)
> -				stride_size = PMD_SIZE;
> -			else
> -				stride_size = PAGE_SIZE;
> -		}
> -	}
> -
> +	stride_size = get_stride_size(vma);
>  	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
>  			  start, end - start, stride_size);
>  }
> @@ -203,6 +171,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>  			  start, end - start, PAGE_SIZE);
>  }
>  
> +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
> +{
> +	__flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
> +			  FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
> +}
> +
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
>  			unsigned long end)
> @@ -212,6 +186,77 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
>  }
>  #endif
>  
> +#else
> +static void __flush_tlb_range_up(struct mm_struct *mm, unsigned long start,
> +				 unsigned long size, unsigned long stride)
> +{
> +	unsigned long asid = FLUSH_TLB_NO_ASID;
> +
> +	if (mm)
> +		asid = get_mm_asid(mm);
> +
> +	local_flush_tlb_range_asid(start, size, stride, asid);
> +}
> +
> +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
> +{
> +	local_flush_tlb_page(addr);
> +}
> +
> +void flush_tlb_all(void)
> +{
> +	local_flush_tlb_all();
> +}
> +
> +void flush_tlb_mm(struct mm_struct *mm)
> +{
> +	__flush_tlb_range_up(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
> +}
> +
> +void flush_tlb_mm_range(struct mm_struct *mm,
> +			unsigned long start, unsigned long end,
> +			unsigned int page_size)
> +{
> +	__flush_tlb_range_up(mm, start, end - start, page_size);
> +}
> +
> +void flush_tlb_range(struct vm_area_struct *vma,
> +		unsigned long start, unsigned long end)
> +{
> +	unsigned long stride_size;
> +
> +	stride_size = get_stride_size(vma);
> +	__flush_tlb_range_up(vma->vm_mm, start, end - start, stride_size);
> +}
> +
> +/* Flush a range of kernel pages */
> +void flush_tlb_kernel_range(unsigned long start,
> +	unsigned long end)
> +{
> +	__flush_tlb_range_up(NULL, start, end - start, PAGE_SIZE);
> +}
> +
> +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
> +{
> +	__flush_tlb_range_up(NULL, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
> +}
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
> +			unsigned long end)
> +{
> +	__flush_tlb_range_up(vma->vm_mm, start, end - start, PMD_SIZE);
> +}
> +#endif
> +
> +#endif
> +
> +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
> +{
> +	local_flush_tlb_range_asid(start, end - start, PAGE_SIZE,
> +				   FLUSH_TLB_NO_ASID);
> +}
> +
>  bool arch_tlbbatch_should_defer(struct mm_struct *mm)
>  {
>  	return true;
> @@ -228,9 +273,3 @@ void arch_flush_tlb_batched_pending(struct mm_struct *mm)
>  {
>  	flush_tlb_mm(mm);
>  }
> -
> -void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
> -{
> -	__flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
> -			  FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
> -}
> -- 
> 2.20.1
>