Hi,
This is v4 of previous series on removal of on-stack cpumask var for
sparc arch.
Change since v3:
- Rebased against for-next of Andreas.
- Add Reviewed-by from Sam.
- Remove PATCH(sparc/init: Remove on-stack cpumask var).
v1:
https://lore.kernel.org/all/[email protected]/
v2:
https://lore.kernel.org/all/[email protected]/
v3:
https://lore.kernel.org/all/[email protected]/
Dawei Li (5):
sparc/srmmu: Remove on-stack cpumask var
sparc/irq: Remove on-stack cpumask var
sparc/of: Remove on-stack cpumask var
sparc/pci_msi: Remove on-stack cpumask var
sparc/leon: Remove on-stack cpumask var
arch/sparc/kernel/irq_64.c | 10 +++-----
arch/sparc/kernel/leon_kernel.c | 7 +++---
arch/sparc/kernel/of_device_64.c | 5 +---
arch/sparc/kernel/pci_msi.c | 5 +---
arch/sparc/mm/srmmu.c | 40 ++++++++++----------------------
5 files changed, 20 insertions(+), 47 deletions(-)
--
2.27.0
In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.
@cpumask of irq_set_affinity() is read-only and free of change, drop
unneeded cpumask var.
Reviewed-by: Sam Ravnborg <[email protected]>
Signed-off-by: Dawei Li <[email protected]>
---
arch/sparc/kernel/pci_msi.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index fc7402948b7b..acb2f83a1d5c 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -287,10 +287,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
nid = pbm->numa_node;
if (nid != -1) {
- cpumask_t numa_mask;
-
- cpumask_copy(&numa_mask, cpumask_of_node(nid));
- irq_set_affinity(irq, &numa_mask);
+ irq_set_affinity(irq, cpumask_of_node(nid));
}
err = request_irq(irq, sparc64_msiq_interrupt, 0,
"MSIQ",
--
2.27.0
In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.
Use cpumask_any_but() to avoid the need for a temporary cpumask on
the stack and simplify code.
Reviewed-by: Sam Ravnborg <[email protected]>
Signed-off-by: Dawei Li <[email protected]>
---
arch/sparc/mm/srmmu.c | 40 ++++++++++++----------------------------
1 file changed, 12 insertions(+), 28 deletions(-)
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index 852085ada368..9df51a62333d 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -1653,13 +1653,15 @@ static void smp_flush_tlb_all(void)
local_ops->tlb_all();
}
+static bool any_other_mm_cpus(struct mm_struct *mm)
+{
+ return cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids;
+}
+
static void smp_flush_cache_mm(struct mm_struct *mm)
{
if (mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask;
- cpumask_copy(&cpu_mask, mm_cpumask(mm));
- cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
- if (!cpumask_empty(&cpu_mask))
+ if (any_other_mm_cpus(mm))
xc1(local_ops->cache_mm, (unsigned long)mm);
local_ops->cache_mm(mm);
}
@@ -1668,10 +1670,7 @@ static void smp_flush_cache_mm(struct mm_struct *mm)
static void smp_flush_tlb_mm(struct mm_struct *mm)
{
if (mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask;
- cpumask_copy(&cpu_mask, mm_cpumask(mm));
- cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
- if (!cpumask_empty(&cpu_mask)) {
+ if (any_other_mm_cpus(mm)) {
xc1(local_ops->tlb_mm, (unsigned long)mm);
if (atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
cpumask_copy(mm_cpumask(mm),
@@ -1688,10 +1687,7 @@ static void smp_flush_cache_range(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
if (mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask;
- cpumask_copy(&cpu_mask, mm_cpumask(mm));
- cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
- if (!cpumask_empty(&cpu_mask))
+ if (any_other_mm_cpus(mm))
xc3(local_ops->cache_range, (unsigned long)vma, start,
end);
local_ops->cache_range(vma, start, end);
@@ -1705,10 +1701,7 @@ static void smp_flush_tlb_range(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
if (mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask;
- cpumask_copy(&cpu_mask, mm_cpumask(mm));
- cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
- if (!cpumask_empty(&cpu_mask))
+ if (any_other_mm_cpus(mm))
xc3(local_ops->tlb_range, (unsigned long)vma, start,
end);
local_ops->tlb_range(vma, start, end);
@@ -1720,10 +1713,7 @@ static void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
struct mm_struct *mm = vma->vm_mm;
if (mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask;
- cpumask_copy(&cpu_mask, mm_cpumask(mm));
- cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
- if (!cpumask_empty(&cpu_mask))
+ if (any_other_mm_cpus(mm))
xc2(local_ops->cache_page, (unsigned long)vma, page);
local_ops->cache_page(vma, page);
}
@@ -1734,10 +1724,7 @@ static void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
struct mm_struct *mm = vma->vm_mm;
if (mm->context != NO_CONTEXT) {
- cpumask_t cpu_mask;
- cpumask_copy(&cpu_mask, mm_cpumask(mm));
- cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
- if (!cpumask_empty(&cpu_mask))
+ if (any_other_mm_cpus(mm))
xc2(local_ops->tlb_page, (unsigned long)vma, page);
local_ops->tlb_page(vma, page);
}
@@ -1759,10 +1746,7 @@ static void smp_flush_page_to_ram(unsigned long page)
static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
{
- cpumask_t cpu_mask;
- cpumask_copy(&cpu_mask, mm_cpumask(mm));
- cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
- if (!cpumask_empty(&cpu_mask))
+ if (any_other_mm_cpus(mm))
xc2(local_ops->sig_insns, (unsigned long)mm, insn_addr);
local_ops->sig_insns(mm, insn_addr);
}
--
2.27.0
In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.
@cpumask of irq_set_affinity() is read-only and free of change, drop
unneeded cpumask var.
Reviewed-by: Sam Ravnborg <[email protected]>
Signed-off-by: Dawei Li <[email protected]>
---
arch/sparc/kernel/of_device_64.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index c350c58c7f69..f98c2901f335 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -624,10 +624,7 @@ static unsigned int __init build_one_device_irq(struct platform_device *op,
out:
nid = of_node_to_nid(dp);
if (nid != -1) {
- cpumask_t numa_mask;
-
- cpumask_copy(&numa_mask, cpumask_of_node(nid));
- irq_set_affinity(irq, &numa_mask);
+ irq_set_affinity(irq, cpumask_of_node(nid));
}
return irq;
--
2.27.0
In general it's preferable to avoid placing cpumasks on the stack, as
for large values of NR_CPUS these can consume significant amounts of
stack space and make stack overflows more likely.
Use cpumask_subset() and cpumask_first_and() to avoid the need for a
temporary cpumask on the stack.
Reviewed-by: Sam Ravnborg <[email protected]>
Signed-off-by: Dawei Li <[email protected]>
---
arch/sparc/kernel/leon_kernel.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
index 4c61da491fee..a43cf794bb1e 100644
--- a/arch/sparc/kernel/leon_kernel.c
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -106,13 +106,12 @@ unsigned long leon_get_irqmask(unsigned int irq)
#ifdef CONFIG_SMP
static int irq_choose_cpu(const struct cpumask *affinity)
{
- cpumask_t mask;
+ unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask);
- cpumask_and(&mask, cpu_online_mask, affinity);
- if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask))
+ if (cpumask_subset(cpu_online_mask, affinity) || cpu >= nr_cpu_ids)
return boot_cpu_id;
else
- return cpumask_first(&mask);
+ return cpu;
}
#else
#define irq_choose_cpu(affinity) boot_cpu_id
--
2.27.0
Hi Andreas,
On Wed, Apr 24, 2024 at 10:55:43AM +0800, Dawei Li wrote:
> Hi,
>
> This is v4 of previous series on removal of on-stack cpumask var for
> sparc arch.
>
> Change since v3:
>
> - Rebased against for-next of Andreas.
>
> - Add Reviewed-by from Sam.
>
> - Remove PATCH(sparc/init: Remove on-stack cpumask var).
>
> v1:
> https://lore.kernel.org/all/[email protected]/
>
> v2:
> https://lore.kernel.org/all/[email protected]/
>
> v3:
> https://lore.kernel.org/all/[email protected]/
>
> Dawei Li (5):
> sparc/srmmu: Remove on-stack cpumask var
> sparc/irq: Remove on-stack cpumask var
> sparc/of: Remove on-stack cpumask var
> sparc/pci_msi: Remove on-stack cpumask var
> sparc/leon: Remove on-stack cpumask var
>
> arch/sparc/kernel/irq_64.c | 10 +++-----
> arch/sparc/kernel/leon_kernel.c | 7 +++---
> arch/sparc/kernel/of_device_64.c | 5 +---
> arch/sparc/kernel/pci_msi.c | 5 +---
> arch/sparc/mm/srmmu.c | 40 ++++++++++----------------------
> 5 files changed, 20 insertions(+), 47 deletions(-)
Any chance of getting this series queued up for v6.10?
Thanks,
Dawei
>
> --
> 2.27.0
>
On 2024-04-24 04:55, Dawei Li wrote:
> In general it's preferable to avoid placing cpumasks on the stack, as
> for large values of NR_CPUS these can consume significant amounts of
> stack space and make stack overflows more likely.
>
> Use cpumask_subset() and cpumask_first_and() to avoid the need for a
> temporary cpumask on the stack.
>
> Reviewed-by: Sam Ravnborg <[email protected]>
> Signed-off-by: Dawei Li <[email protected]>
> ---
> arch/sparc/kernel/leon_kernel.c | 7 +++----
> 1 file changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
> index 4c61da491fee..a43cf794bb1e 100644
> --- a/arch/sparc/kernel/leon_kernel.c
> +++ b/arch/sparc/kernel/leon_kernel.c
> @@ -106,13 +106,12 @@ unsigned long leon_get_irqmask(unsigned int irq)
> #ifdef CONFIG_SMP
> static int irq_choose_cpu(const struct cpumask *affinity)
> {
> - cpumask_t mask;
> + unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask);
>
> - cpumask_and(&mask, cpu_online_mask, affinity);
> - if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask))
> + if (cpumask_subset(cpu_online_mask, affinity) || cpu >= nr_cpu_ids)
> return boot_cpu_id;
> else
> - return cpumask_first(&mask);
> + return cpu;
> }
> #else
> #define irq_choose_cpu(affinity) boot_cpu_id
Reviewed-by: Andreas Larsson <[email protected]>
Tested-by: Andreas Larsson <[email protected]>
Picking this up to my for-next.
Thanks,
Andreas
On 2024-05-07 14:56, Dawei Li wrote:
> Hi Andreas,
>
> On Wed, Apr 24, 2024 at 10:55:43AM +0800, Dawei Li wrote:
>> Hi,
>>
>> This is v4 of previous series on removal of on-stack cpumask var for
>> sparc arch.
>>
>> Change since v3:
>>
>> - Rebased against for-next of Andreas.
>>
>> - Add Reviewed-by from Sam.
>>
>> - Remove PATCH(sparc/init: Remove on-stack cpumask var).
>>
>> v1:
>> https://lore.kernel.org/all/[email protected]/
>>
>> v2:
>> https://lore.kernel.org/all/[email protected]/
>>
>> v3:
>> https://lore.kernel.org/all/[email protected]/
>>
>> Dawei Li (5):
>> sparc/srmmu: Remove on-stack cpumask var
>> sparc/irq: Remove on-stack cpumask var
>> sparc/of: Remove on-stack cpumask var
>> sparc/pci_msi: Remove on-stack cpumask var
>> sparc/leon: Remove on-stack cpumask var
>>
>> arch/sparc/kernel/irq_64.c | 10 +++-----
>> arch/sparc/kernel/leon_kernel.c | 7 +++---
>> arch/sparc/kernel/of_device_64.c | 5 +---
>> arch/sparc/kernel/pci_msi.c | 5 +---
>> arch/sparc/mm/srmmu.c | 40 ++++++++++----------------------
>> 5 files changed, 20 insertions(+), 47 deletions(-)
>
> Any chance of getting this series queued up for v6.10?
Yes. Picking the series up to my for-next.
Reviewed-by: Andreas Larsson <[email protected]>
Thanks,
Andreas
On 2024-04-24 04:55, Dawei Li wrote:
> In general it's preferable to avoid placing cpumasks on the stack, as
> for large values of NR_CPUS these can consume significant amounts of
> stack space and make stack overflows more likely.
>
> Use cpumask_any_but() to avoid the need for a temporary cpumask on
> the stack and simplify code.
>
> Reviewed-by: Sam Ravnborg <[email protected]>
> Signed-off-by: Dawei Li <[email protected]>
> ---
> arch/sparc/mm/srmmu.c | 40 ++++++++++++----------------------------
> 1 file changed, 12 insertions(+), 28 deletions(-)
>
> diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
> index 852085ada368..9df51a62333d 100644
> --- a/arch/sparc/mm/srmmu.c
> +++ b/arch/sparc/mm/srmmu.c
> @@ -1653,13 +1653,15 @@ static void smp_flush_tlb_all(void)
> local_ops->tlb_all();
> }
>
> +static bool any_other_mm_cpus(struct mm_struct *mm)
> +{
> + return cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids;
> +}
> +
> static void smp_flush_cache_mm(struct mm_struct *mm)
> {
> if (mm->context != NO_CONTEXT) {
> - cpumask_t cpu_mask;
> - cpumask_copy(&cpu_mask, mm_cpumask(mm));
> - cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
> - if (!cpumask_empty(&cpu_mask))
> + if (any_other_mm_cpus(mm))
> xc1(local_ops->cache_mm, (unsigned long)mm);
> local_ops->cache_mm(mm);
> }
> @@ -1668,10 +1670,7 @@ static void smp_flush_cache_mm(struct mm_struct *mm)
> static void smp_flush_tlb_mm(struct mm_struct *mm)
> {
> if (mm->context != NO_CONTEXT) {
> - cpumask_t cpu_mask;
> - cpumask_copy(&cpu_mask, mm_cpumask(mm));
> - cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
> - if (!cpumask_empty(&cpu_mask)) {
> + if (any_other_mm_cpus(mm)) {
> xc1(local_ops->tlb_mm, (unsigned long)mm);
> if (atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
> cpumask_copy(mm_cpumask(mm),
> @@ -1688,10 +1687,7 @@ static void smp_flush_cache_range(struct vm_area_struct *vma,
> struct mm_struct *mm = vma->vm_mm;
>
> if (mm->context != NO_CONTEXT) {
> - cpumask_t cpu_mask;
> - cpumask_copy(&cpu_mask, mm_cpumask(mm));
> - cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
> - if (!cpumask_empty(&cpu_mask))
> + if (any_other_mm_cpus(mm))
> xc3(local_ops->cache_range, (unsigned long)vma, start,
> end);
> local_ops->cache_range(vma, start, end);
> @@ -1705,10 +1701,7 @@ static void smp_flush_tlb_range(struct vm_area_struct *vma,
> struct mm_struct *mm = vma->vm_mm;
>
> if (mm->context != NO_CONTEXT) {
> - cpumask_t cpu_mask;
> - cpumask_copy(&cpu_mask, mm_cpumask(mm));
> - cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
> - if (!cpumask_empty(&cpu_mask))
> + if (any_other_mm_cpus(mm))
> xc3(local_ops->tlb_range, (unsigned long)vma, start,
> end);
> local_ops->tlb_range(vma, start, end);
> @@ -1720,10 +1713,7 @@ static void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
> struct mm_struct *mm = vma->vm_mm;
>
> if (mm->context != NO_CONTEXT) {
> - cpumask_t cpu_mask;
> - cpumask_copy(&cpu_mask, mm_cpumask(mm));
> - cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
> - if (!cpumask_empty(&cpu_mask))
> + if (any_other_mm_cpus(mm))
> xc2(local_ops->cache_page, (unsigned long)vma, page);
> local_ops->cache_page(vma, page);
> }
> @@ -1734,10 +1724,7 @@ static void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
> struct mm_struct *mm = vma->vm_mm;
>
> if (mm->context != NO_CONTEXT) {
> - cpumask_t cpu_mask;
> - cpumask_copy(&cpu_mask, mm_cpumask(mm));
> - cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
> - if (!cpumask_empty(&cpu_mask))
> + if (any_other_mm_cpus(mm))
> xc2(local_ops->tlb_page, (unsigned long)vma, page);
> local_ops->tlb_page(vma, page);
> }
> @@ -1759,10 +1746,7 @@ static void smp_flush_page_to_ram(unsigned long page)
>
> static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
> {
> - cpumask_t cpu_mask;
> - cpumask_copy(&cpu_mask, mm_cpumask(mm));
> - cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
> - if (!cpumask_empty(&cpu_mask))
> + if (any_other_mm_cpus(mm))
> xc2(local_ops->sig_insns, (unsigned long)mm, insn_addr);
> local_ops->sig_insns(mm, insn_addr);
> }
Reviewed-by: Andreas Larsson <[email protected]>
Tested-by: Andreas Larsson <[email protected]>
Picking this up to my for-next.
Thanks,
Andreas