2024-05-08 12:04:37

by Puranjay Mohan

[permalink] [raw]
Subject: [PATCH bpf v2] powerpc/bpf: enforce full ordering for ATOMIC operations with BPF_FETCH

The Linux Kernel Memory Model [1][2] requires RMW operations that have a
return value to be fully ordered.

BPF atomic operations with BPF_FETCH (including BPF_XCHG and
BPF_CMPXCHG) return a value back so they need to be JITed to fully
ordered operations. POWERPC currently emits relaxed operations for
these.

We can show this by running the following litmus-test:

PPC SB+atomic_add+fetch

{
0:r0=x; (* dst reg assuming offset is 0 *)
0:r1=2; (* src reg *)
0:r2=1;
0:r4=y; (* P0 writes to this, P1 reads this *)
0:r5=z; (* P1 writes to this, P0 reads this *)
0:r6=0;

1:r2=1;
1:r4=y;
1:r5=z;
}

P0 | P1 ;
stw r2, 0(r4) | stw r2,0(r5) ;
| ;
loop:lwarx r3, r6, r0 | ;
mr r8, r3 | ;
add r3, r3, r1 | sync ;
stwcx. r3, r6, r0 | ;
bne loop | ;
mr r1, r8 | ;
| ;
lwa r7, 0(r5) | lwa r7,0(r4) ;

~exists(0:r7=0 /\ 1:r7=0)

Witnesses
Positive: 9 Negative: 3
Condition ~exists (0:r7=0 /\ 1:r7=0)
Observation SB+atomic_add+fetch Sometimes 3 9

This test shows that the older store in P0 is reordered with a newer
load to a different address. Although there is a RMW operation with
fetch between them. Adding a sync before and after RMW fixes the issue:

Witnesses
Positive: 9 Negative: 0
Condition ~exists (0:r7=0 /\ 1:r7=0)
Observation SB+atomic_add+fetch Never 0 9

[1] https://www.kernel.org/doc/Documentation/memory-barriers.txt
[2] https://www.kernel.org/doc/Documentation/atomic_t.txt

Fixes: 65112709115f ("powerpc/bpf/64: add support for BPF_ATOMIC bitwise operations")
Signed-off-by: Puranjay Mohan <[email protected]>
---
Changes in v1 -> v2:
v1: https://lore.kernel.org/all/[email protected]/
- Don't emit `sync` for non-SMP kernels as that adds unessential overhead.
---
arch/powerpc/net/bpf_jit_comp32.c | 12 ++++++++++++
arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++++++++
2 files changed, 24 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 2f39c50ca729..0318b83f2e6a 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -853,6 +853,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
/* Get offset into TMP_REG */
EMIT(PPC_RAW_LI(tmp_reg, off));
tmp_idx = ctx->idx * 4;
+ /*
+ * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
+ * before and after the operation.
+ *
+ * This is a requirement in the Linux Kernel Memory Model.
+ * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
+ */
+ if (imm & BPF_FETCH && IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
/* load value from memory into r0 */
EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));

@@ -905,6 +914,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code

/* For the BPF_FETCH variant, get old data into src_reg */
if (imm & BPF_FETCH) {
+ /* Emit 'sync' to enforce full ordering */
+ if (IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
EMIT(PPC_RAW_MR(ret_reg, ax_reg));
if (!fp->aux->verifier_zext)
EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 79f23974a320..9a077f8acf7b 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -804,6 +804,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
/* Get offset into TMP_REG_1 */
EMIT(PPC_RAW_LI(tmp1_reg, off));
tmp_idx = ctx->idx * 4;
+ /*
+ * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
+ * before and after the operation.
+ *
+ * This is a requirement in the Linux Kernel Memory Model.
+ * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
+ */
+ if (imm & BPF_FETCH && IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
/* load value from memory into TMP_REG_2 */
if (size == BPF_DW)
EMIT(PPC_RAW_LDARX(tmp2_reg, tmp1_reg, dst_reg, 0));
@@ -865,6 +874,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
PPC_BCC_SHORT(COND_NE, tmp_idx);

if (imm & BPF_FETCH) {
+ /* Emit 'sync' to enforce full ordering */
+ if (IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
EMIT(PPC_RAW_MR(ret_reg, _R0));
/*
* Skip unnecessary zero-extension for 32-bit cmpxchg.
--
2.42.0



2024-05-08 14:25:11

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [PATCH bpf v2] powerpc/bpf: enforce full ordering for ATOMIC operations with BPF_FETCH

On Wed, May 08, 2024 at 11:54:04AM +0000, Puranjay Mohan wrote:
> The Linux Kernel Memory Model [1][2] requires RMW operations that have a
> return value to be fully ordered.
>
> BPF atomic operations with BPF_FETCH (including BPF_XCHG and
> BPF_CMPXCHG) return a value back so they need to be JITed to fully
> ordered operations. POWERPC currently emits relaxed operations for
> these.
>
> We can show this by running the following litmus-test:
>
> PPC SB+atomic_add+fetch
>
> {
> 0:r0=x; (* dst reg assuming offset is 0 *)
> 0:r1=2; (* src reg *)
> 0:r2=1;
> 0:r4=y; (* P0 writes to this, P1 reads this *)
> 0:r5=z; (* P1 writes to this, P0 reads this *)
> 0:r6=0;
>
> 1:r2=1;
> 1:r4=y;
> 1:r5=z;
> }
>
> P0 | P1 ;
> stw r2, 0(r4) | stw r2,0(r5) ;
> | ;
> loop:lwarx r3, r6, r0 | ;
> mr r8, r3 | ;
> add r3, r3, r1 | sync ;
> stwcx. r3, r6, r0 | ;
> bne loop | ;
> mr r1, r8 | ;
> | ;
> lwa r7, 0(r5) | lwa r7,0(r4) ;
>
> ~exists(0:r7=0 /\ 1:r7=0)
>
> Witnesses
> Positive: 9 Negative: 3
> Condition ~exists (0:r7=0 /\ 1:r7=0)
> Observation SB+atomic_add+fetch Sometimes 3 9
>
> This test shows that the older store in P0 is reordered with a newer
> load to a different address. Although there is a RMW operation with
> fetch between them. Adding a sync before and after RMW fixes the issue:
>
> Witnesses
> Positive: 9 Negative: 0
> Condition ~exists (0:r7=0 /\ 1:r7=0)
> Observation SB+atomic_add+fetch Never 0 9
>
> [1] https://www.kernel.org/doc/Documentation/memory-barriers.txt
> [2] https://www.kernel.org/doc/Documentation/atomic_t.txt
>
> Fixes: 65112709115f ("powerpc/bpf/64: add support for BPF_ATOMIC bitwise operations")
> Signed-off-by: Puranjay Mohan <[email protected]>

Acked-by: Paul E. McKenney <[email protected]>

> ---
> Changes in v1 -> v2:
> v1: https://lore.kernel.org/all/[email protected]/
> - Don't emit `sync` for non-SMP kernels as that adds unessential overhead.
> ---
> arch/powerpc/net/bpf_jit_comp32.c | 12 ++++++++++++
> arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++++++++
> 2 files changed, 24 insertions(+)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 2f39c50ca729..0318b83f2e6a 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -853,6 +853,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
> /* Get offset into TMP_REG */
> EMIT(PPC_RAW_LI(tmp_reg, off));
> tmp_idx = ctx->idx * 4;
> + /*
> + * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
> + * before and after the operation.
> + *
> + * This is a requirement in the Linux Kernel Memory Model.
> + * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
> + */
> + if (imm & BPF_FETCH && IS_ENABLED(CONFIG_SMP))
> + EMIT(PPC_RAW_SYNC());
> /* load value from memory into r0 */
> EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
>
> @@ -905,6 +914,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
>
> /* For the BPF_FETCH variant, get old data into src_reg */
> if (imm & BPF_FETCH) {
> + /* Emit 'sync' to enforce full ordering */
> + if (IS_ENABLED(CONFIG_SMP))
> + EMIT(PPC_RAW_SYNC());
> EMIT(PPC_RAW_MR(ret_reg, ax_reg));
> if (!fp->aux->verifier_zext)
> EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 79f23974a320..9a077f8acf7b 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -804,6 +804,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
> /* Get offset into TMP_REG_1 */
> EMIT(PPC_RAW_LI(tmp1_reg, off));
> tmp_idx = ctx->idx * 4;
> + /*
> + * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
> + * before and after the operation.
> + *
> + * This is a requirement in the Linux Kernel Memory Model.
> + * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
> + */
> + if (imm & BPF_FETCH && IS_ENABLED(CONFIG_SMP))
> + EMIT(PPC_RAW_SYNC());
> /* load value from memory into TMP_REG_2 */
> if (size == BPF_DW)
> EMIT(PPC_RAW_LDARX(tmp2_reg, tmp1_reg, dst_reg, 0));
> @@ -865,6 +874,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
> PPC_BCC_SHORT(COND_NE, tmp_idx);
>
> if (imm & BPF_FETCH) {
> + /* Emit 'sync' to enforce full ordering */
> + if (IS_ENABLED(CONFIG_SMP))
> + EMIT(PPC_RAW_SYNC());
> EMIT(PPC_RAW_MR(ret_reg, _R0));
> /*
> * Skip unnecessary zero-extension for 32-bit cmpxchg.
> --
> 2.42.0
>

2024-05-08 17:09:06

by Naveen N Rao

[permalink] [raw]
Subject: Re: [PATCH bpf v2] powerpc/bpf: enforce full ordering for ATOMIC operations with BPF_FETCH

On Wed, May 08, 2024 at 11:54:04AM GMT, Puranjay Mohan wrote:
> The Linux Kernel Memory Model [1][2] requires RMW operations that have a
> return value to be fully ordered.
>
> BPF atomic operations with BPF_FETCH (including BPF_XCHG and
> BPF_CMPXCHG) return a value back so they need to be JITed to fully
> ordered operations. POWERPC currently emits relaxed operations for
> these.
>
> We can show this by running the following litmus-test:
>
> PPC SB+atomic_add+fetch
>
> {
> 0:r0=x; (* dst reg assuming offset is 0 *)
> 0:r1=2; (* src reg *)
> 0:r2=1;
> 0:r4=y; (* P0 writes to this, P1 reads this *)
> 0:r5=z; (* P1 writes to this, P0 reads this *)
> 0:r6=0;
>
> 1:r2=1;
> 1:r4=y;
> 1:r5=z;
> }
>
> P0 | P1 ;
> stw r2, 0(r4) | stw r2,0(r5) ;
> | ;
> loop:lwarx r3, r6, r0 | ;
> mr r8, r3 | ;
> add r3, r3, r1 | sync ;
> stwcx. r3, r6, r0 | ;
> bne loop | ;
> mr r1, r8 | ;
> | ;
> lwa r7, 0(r5) | lwa r7,0(r4) ;
>
> ~exists(0:r7=0 /\ 1:r7=0)
>
> Witnesses
> Positive: 9 Negative: 3
> Condition ~exists (0:r7=0 /\ 1:r7=0)
> Observation SB+atomic_add+fetch Sometimes 3 9
>
> This test shows that the older store in P0 is reordered with a newer
> load to a different address. Although there is a RMW operation with
> fetch between them. Adding a sync before and after RMW fixes the issue:
>
> Witnesses
> Positive: 9 Negative: 0
> Condition ~exists (0:r7=0 /\ 1:r7=0)
> Observation SB+atomic_add+fetch Never 0 9
>
> [1] https://www.kernel.org/doc/Documentation/memory-barriers.txt
> [2] https://www.kernel.org/doc/Documentation/atomic_t.txt
>
> Fixes: 65112709115f ("powerpc/bpf/64: add support for BPF_ATOMIC bitwise operations")
> Signed-off-by: Puranjay Mohan <[email protected]>

Thanks for reporting and fixing this.

There are actually four commits that this fixes across ppc32/ppc64:
Fixes: aea7ef8a82c0 ("powerpc/bpf/32: add support for BPF_ATOMIC bitwise operations")
Fixes: 2d9206b22743 ("powerpc/bpf/32: Add instructions for atomic_[cmp]xchg")
Fixes: dbe6e2456fb0 ("powerpc/bpf/64: add support for atomic fetch operations")
Fixes: 1e82dfaa7819 ("powerpc/bpf/64: Add instructions for atomic_[cmp]xchg")

> ---
> Changes in v1 -> v2:
> v1: https://lore.kernel.org/all/[email protected]/
> - Don't emit `sync` for non-SMP kernels as that adds unessential overhead.
> ---
> arch/powerpc/net/bpf_jit_comp32.c | 12 ++++++++++++
> arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++++++++
> 2 files changed, 24 insertions(+)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 2f39c50ca729..0318b83f2e6a 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -853,6 +853,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
> /* Get offset into TMP_REG */
> EMIT(PPC_RAW_LI(tmp_reg, off));
> tmp_idx = ctx->idx * 4;
> + /*
> + * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
> + * before and after the operation.
> + *
> + * This is a requirement in the Linux Kernel Memory Model.
> + * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
^^^
Nit... u32

> + */
> + if (imm & BPF_FETCH && IS_ENABLED(CONFIG_SMP))
> + EMIT(PPC_RAW_SYNC());

I think this block should go before the previous two instructions. We
use tmp_idx as a label to retry the ll/sc sequence, so we will end up
executing the 'sync' operation on a retry here.

> /* load value from memory into r0 */
> EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
>
> @@ -905,6 +914,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
>
> /* For the BPF_FETCH variant, get old data into src_reg */
> if (imm & BPF_FETCH) {
> + /* Emit 'sync' to enforce full ordering */
> + if (IS_ENABLED(CONFIG_SMP))
> + EMIT(PPC_RAW_SYNC());
> EMIT(PPC_RAW_MR(ret_reg, ax_reg));
> if (!fp->aux->verifier_zext)
> EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 79f23974a320..9a077f8acf7b 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -804,6 +804,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
> /* Get offset into TMP_REG_1 */
> EMIT(PPC_RAW_LI(tmp1_reg, off));
> tmp_idx = ctx->idx * 4;
> + /*
> + * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
> + * before and after the operation.
> + *
> + * This is a requirement in the Linux Kernel Memory Model.
> + * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
> + */
> + if (imm & BPF_FETCH && IS_ENABLED(CONFIG_SMP))
> + EMIT(PPC_RAW_SYNC());

Same here.

I'll try and give this a test tomorrow.


- Naveen


2024-05-13 06:18:22

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH bpf v2] powerpc/bpf: enforce full ordering for ATOMIC operations with BPF_FETCH



Le 08/05/2024 à 13:54, Puranjay Mohan a écrit :
> [Vous ne recevez pas souvent de courriers de [email protected]. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>
> The Linux Kernel Memory Model [1][2] requires RMW operations that have a
> return value to be fully ordered.
>
> BPF atomic operations with BPF_FETCH (including BPF_XCHG and
> BPF_CMPXCHG) return a value back so they need to be JITed to fully
> ordered operations. POWERPC currently emits relaxed operations for
> these.
>
> We can show this by running the following litmus-test:
>
> PPC SB+atomic_add+fetch
>
> {
> 0:r0=x; (* dst reg assuming offset is 0 *)
> 0:r1=2; (* src reg *)
> 0:r2=1;
> 0:r4=y; (* P0 writes to this, P1 reads this *)
> 0:r5=z; (* P1 writes to this, P0 reads this *)
> 0:r6=0;
>
> 1:r2=1;
> 1:r4=y;
> 1:r5=z;
> }
>
> P0 | P1 ;
> stw r2, 0(r4) | stw r2,0(r5) ;
> | ;
> loop:lwarx r3, r6, r0 | ;
> mr r8, r3 | ;
> add r3, r3, r1 | sync ;
> stwcx. r3, r6, r0 | ;
> bne loop | ;
> mr r1, r8 | ;
> | ;
> lwa r7, 0(r5) | lwa r7,0(r4) ;
>
> ~exists(0:r7=0 /\ 1:r7=0)
>
> Witnesses
> Positive: 9 Negative: 3
> Condition ~exists (0:r7=0 /\ 1:r7=0)
> Observation SB+atomic_add+fetch Sometimes 3 9
>
> This test shows that the older store in P0 is reordered with a newer
> load to a different address. Although there is a RMW operation with
> fetch between them. Adding a sync before and after RMW fixes the issue:
>
> Witnesses
> Positive: 9 Negative: 0
> Condition ~exists (0:r7=0 /\ 1:r7=0)
> Observation SB+atomic_add+fetch Never 0 9
>
> [1] https://www.kernel.org/doc/Documentation/memory-barriers.txt
> [2] https://www.kernel.org/doc/Documentation/atomic_t.txt
>
> Fixes: 65112709115f ("powerpc/bpf/64: add support for BPF_ATOMIC bitwise operations")
> Signed-off-by: Puranjay Mohan <[email protected]>
> ---
> Changes in v1 -> v2:
> v1: https://lore.kernel.org/all/[email protected]/
> - Don't emit `sync` for non-SMP kernels as that adds unessential overhead.
> ---
> arch/powerpc/net/bpf_jit_comp32.c | 12 ++++++++++++
> arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++++++++
> 2 files changed, 24 insertions(+)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 2f39c50ca729..0318b83f2e6a 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -853,6 +853,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
> /* Get offset into TMP_REG */
> EMIT(PPC_RAW_LI(tmp_reg, off));
> tmp_idx = ctx->idx * 4;
> + /*
> + * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
> + * before and after the operation.
> + *
> + * This is a requirement in the Linux Kernel Memory Model.
> + * See __cmpxchg_u64() in asm/cmpxchg.h as an example.

You mean __cmpxchg_u32() ?

> + */
> + if (imm & BPF_FETCH && IS_ENABLED(CONFIG_SMP))

Please enclose imm & BPF_FETCH inside a () to improve readability.

> + EMIT(PPC_RAW_SYNC());

Do you really want to do it inside the loop at each try ? Because when
stwcx. fails it jumps back at tmp_idx which is your new sync now.

> /* load value from memory into r0 */
> EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
>
> @@ -905,6 +914,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
>
> /* For the BPF_FETCH variant, get old data into src_reg */
> if (imm & BPF_FETCH) {
> + /* Emit 'sync' to enforce full ordering */
> + if (IS_ENABLED(CONFIG_SMP))
> + EMIT(PPC_RAW_SYNC());
> EMIT(PPC_RAW_MR(ret_reg, ax_reg));
> if (!fp->aux->verifier_zext)
> EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 79f23974a320..9a077f8acf7b 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -804,6 +804,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
> /* Get offset into TMP_REG_1 */
> EMIT(PPC_RAW_LI(tmp1_reg, off));
> tmp_idx = ctx->idx * 4;
> + /*
> + * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
> + * before and after the operation.
> + *
> + * This is a requirement in the Linux Kernel Memory Model.
> + * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
> + */
> + if (imm & BPF_FETCH && IS_ENABLED(CONFIG_SMP))

Please enclose imm & BPF_FETCH inside a () to improve readability.


> + EMIT(PPC_RAW_SYNC());

Same, the sync should be outside the loop I guess.

> /* load value from memory into TMP_REG_2 */
> if (size == BPF_DW)
> EMIT(PPC_RAW_LDARX(tmp2_reg, tmp1_reg, dst_reg, 0));
> @@ -865,6 +874,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
> PPC_BCC_SHORT(COND_NE, tmp_idx);
>
> if (imm & BPF_FETCH) {
> + /* Emit 'sync' to enforce full ordering */
> + if (IS_ENABLED(CONFIG_SMP))
> + EMIT(PPC_RAW_SYNC());
> EMIT(PPC_RAW_MR(ret_reg, _R0));
> /*
> * Skip unnecessary zero-extension for 32-bit cmpxchg.
> --
> 2.42.0
>