----- On Aug 26, 2020, at 7:02 PM, Peter Oskolkov [email protected] wrote:
[...]
>
> static void ipi_mb(void *info)
> {
> +#ifdef CONFIG_RSEQ
> + int *flags = info;
> +
> + if (flags && (*flags == MEMBARRIER_FLAG_RSEQ))
> + rseq_preempt(current);
> +#endif
Please lift this into a new ipi_rseq(), which will be defined as an empty function
if RSEQ is not defined.
> smp_mb(); /* IPIs should be serializing but paranoid. */
> }
>
> @@ -129,19 +143,26 @@ static int membarrier_global_expedited(void)
> return 0;
> }
>
> -static int membarrier_private_expedited(int flags)
> +static int membarrier_private_expedited(int flags, int cpu_id)
> {
> int cpu;
> cpumask_var_t tmpmask;
> struct mm_struct *mm = current->mm;
>
> - if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
> + if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
I'm not sure why we need to change the behavior from a mask on flags to
an equality, which means this behaves more like a list of items rather
than flags.
It's one thing to disallow combining things like SYNC_CORE and RSEQ in the
ABI, but I wonder why we need to change the flags behavior to an equality
for the internal flags.
> if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
> return -EINVAL;
> if (!(atomic_read(&mm->membarrier_state) &
> MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
> return -EPERM;
> + } else if (flags == MEMBARRIER_FLAG_RSEQ) {
> + if (!IS_ENABLED(CONFIG_RSEQ))
> + return -EINVAL;
> + if (!(atomic_read(&mm->membarrier_state) &
> + MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY))
> + return -EPERM;
> } else {
> + BUG_ON(flags != 0);
> if (!(atomic_read(&mm->membarrier_state) &
> MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
> return -EPERM;
> @@ -174,6 +195,8 @@ static int membarrier_private_expedited(int flags)
> */
> if (cpu == raw_smp_processor_id())
> continue;
> + if (cpu_id >= 0 && cpu != cpu_id)
> + continue;
When the cpu is specified, it seems rather inefficient to iterate on all
cpus to skip all but the one we are looking for. I suspect we don't want
to go through the loop in that case.
> p = rcu_dereference(cpu_rq(cpu)->curr);
> if (p && p->mm == mm)
> __cpumask_set_cpu(cpu, tmpmask);
> @@ -181,7 +204,7 @@ static int membarrier_private_expedited(int flags)
> rcu_read_unlock();
>
> preempt_disable();
> - smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
> + smp_call_function_many(tmpmask, ipi_mb, &flags, 1);
> preempt_enable();
>
> free_cpumask_var(tmpmask);
> @@ -283,11 +306,18 @@ static int membarrier_register_private_expedited(int
> flags)
> set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
> ret;
>
> - if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
> + if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
Same comment about changing this internal flags behavior from mask to equality.
> if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
> return -EINVAL;
> ready_state =
> MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
> + } else if (flags == MEMBARRIER_FLAG_RSEQ) {
> + if (!IS_ENABLED(CONFIG_RSEQ))
> + return -EINVAL;
> + ready_state =
> + MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY;
> + } else {
> + BUG_ON(flags != 0);
> }
>
> /*
> @@ -299,6 +329,8 @@ static int membarrier_register_private_expedited(int flags)
> return 0;
> if (flags & MEMBARRIER_FLAG_SYNC_CORE)
> set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
> + if (flags & MEMBARRIER_FLAG_RSEQ)
> + set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ;
This one still behaves like a mask, so there is a discrepancy between registration
and action functions.
> atomic_or(set_state, &mm->membarrier_state);
> ret = sync_runqueues_membarrier_state(mm);
> if (ret)
> @@ -310,8 +342,15 @@ static int membarrier_register_private_expedited(int flags)
>
> /**
> * sys_membarrier - issue memory barriers on a set of threads
> - * @cmd: Takes command values defined in enum membarrier_cmd.
> - * @flags: Currently needs to be 0. For future extensions.
> + * @cmd: Takes command values defined in enum membarrier_cmd.
> + * @flags: Currently needs to be 0 for all commands other than
> + * MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: in the latter
> + * case it can be MEMBARRIER_CMD_FLAG_CPU, indicating that @cpu_id
> + * contains the CPU on which to interrupt (= restart)
> + * the RSEQ critical section.
> + * @cpu_id: if @flags == MEMBARRIER_CMD_FLAG_CPU, indicates the cpu on which
> + * RSEQ CS should be interrupted (@cmd must be
> + * MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ).
> *
> * If this system call is not implemented, -ENOSYS is returned. If the
> * command specified does not exist, not available on the running
> @@ -337,9 +376,9 @@ static int membarrier_register_private_expedited(int flags)
> * smp_mb() X O O
> * sys_membarrier() O O O
> */
> -SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
> +SYSCALL_DEFINE3(membarrier, int, cmd, int, flags, int, cpu_id)
> {
> - if (unlikely(flags))
> + if (unlikely(flags) && cmd != MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ)
I would prefer that we deal with flags and cpu_id entirely here rather than
half here, half below, with e.g.:
switch (cmd) {
case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
if (unlikely(flags && flags != MEMBARRIER_CMD_FLAG_CPU))
return -EINVAL;
break;
default:
if (unlikely(flags))
return -EINVAL;
}
if (!(flags & MEMBARRIER_CMD_FLAG_CPU))
cpu_id = -1;
> return -EINVAL;
> switch (cmd) {
> case MEMBARRIER_CMD_QUERY:
> @@ -362,13 +401,21 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
> case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
> return membarrier_register_global_expedited();
> case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
> - return membarrier_private_expedited(0);
> + return membarrier_private_expedited(0, -1);
We can then change the -1 for cpu_id here.
> case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
> return membarrier_register_private_expedited(0);
> case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
> - return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
> + return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE, -1);
And here.
> case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
> return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
> + case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
> + if (flags == 0)
> + return membarrier_private_expedited(MEMBARRIER_FLAG_RSEQ, -1);
> + if (flags == MEMBARRIER_CMD_FLAG_CPU)
> + return membarrier_private_expedited(MEMBARRIER_FLAG_RSEQ, cpu_id);
> + return -EINVAL;
and here we can just call:
return membarrier_private_expedited(MEMBARRIER_FLAG_RSEQ, cpu_id);
Thanks,
Mathieu
> + case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ:
> + return membarrier_register_private_expedited(MEMBARRIER_FLAG_RSEQ);
> default:
> return -EINVAL;
> }
> --
> 2.28.0.297.g1956fa8f8d-goog
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
On Mon, Aug 31, 2020 at 8:35 AM Mathieu Desnoyers
<[email protected]> wrote:
>
Thanks for the review!
>
> ----- On Aug 26, 2020, at 7:02 PM, Peter Oskolkov [email protected] wrote:
> [...]
> >
> > static void ipi_mb(void *info)
> > {
> > +#ifdef CONFIG_RSEQ
> > + int *flags = info;
> > +
> > + if (flags && (*flags == MEMBARRIER_FLAG_RSEQ))
> > + rseq_preempt(current);
> > +#endif
>
> Please lift this into a new ipi_rseq(), which will be defined as an empty function
> if RSEQ is not defined.
Done.
>
>
> > smp_mb(); /* IPIs should be serializing but paranoid. */
> > }
> >
> > @@ -129,19 +143,26 @@ static int membarrier_global_expedited(void)
> > return 0;
> > }
> >
> > -static int membarrier_private_expedited(int flags)
> > +static int membarrier_private_expedited(int flags, int cpu_id)
> > {
> > int cpu;
> > cpumask_var_t tmpmask;
> > struct mm_struct *mm = current->mm;
> >
> > - if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
> > + if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
>
> I'm not sure why we need to change the behavior from a mask on flags to
> an equality, which means this behaves more like a list of items rather
> than flags.
>
> It's one thing to disallow combining things like SYNC_CORE and RSEQ in the
> ABI, but I wonder why we need to change the flags behavior to an equality
> for the internal flags.
I do not feel too strongly about this, but using "flags & XXX" implies
that flags is a bitmask that can have more than one bit set. I was actually
confused initially by this and was trying to figure out where / how more than
one bit can be set, and where / how this is handled. By explicitly using "=="
the code indicates that (at the moment) this is not a bitmask.
I can revert the change back to "&" if you think it is better than having "==".
>
> > if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
> > return -EINVAL;
> > if (!(atomic_read(&mm->membarrier_state) &
> > MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
> > return -EPERM;
> > + } else if (flags == MEMBARRIER_FLAG_RSEQ) {
> > + if (!IS_ENABLED(CONFIG_RSEQ))
> > + return -EINVAL;
> > + if (!(atomic_read(&mm->membarrier_state) &
> > + MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY))
> > + return -EPERM;
> > } else {
> > + BUG_ON(flags != 0);
> > if (!(atomic_read(&mm->membarrier_state) &
> > MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
> > return -EPERM;
> > @@ -174,6 +195,8 @@ static int membarrier_private_expedited(int flags)
> > */
> > if (cpu == raw_smp_processor_id())
> > continue;
> > + if (cpu_id >= 0 && cpu != cpu_id)
> > + continue;
>
> When the cpu is specified, it seems rather inefficient to iterate on all
> cpus to skip all but the one we are looking for. I suspect we don't want
> to go through the loop in that case.
Done. The code is a bit more complicated now, but definitely more
efficient.
>
> > p = rcu_dereference(cpu_rq(cpu)->curr);
> > if (p && p->mm == mm)
> > __cpumask_set_cpu(cpu, tmpmask);
> > @@ -181,7 +204,7 @@ static int membarrier_private_expedited(int flags)
> > rcu_read_unlock();
> >
> > preempt_disable();
> > - smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
> > + smp_call_function_many(tmpmask, ipi_mb, &flags, 1);
> > preempt_enable();
> >
> > free_cpumask_var(tmpmask);
> > @@ -283,11 +306,18 @@ static int membarrier_register_private_expedited(int
> > flags)
> > set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
> > ret;
> >
> > - if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
> > + if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
>
> Same comment about changing this internal flags behavior from mask to equality.
Same reply :)
I can revert the change, but it will look weird, imho - the code does not
treat flags as a bitmask, and changing it to actually work with flags a bitmask
will make it more complicated without a real use case at the moment.
>
> > if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
> > return -EINVAL;
> > ready_state =
> > MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
> > + } else if (flags == MEMBARRIER_FLAG_RSEQ) {
> > + if (!IS_ENABLED(CONFIG_RSEQ))
> > + return -EINVAL;
> > + ready_state =
> > + MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY;
> > + } else {
> > + BUG_ON(flags != 0);
> > }
> >
> > /*
> > @@ -299,6 +329,8 @@ static int membarrier_register_private_expedited(int flags)
> > return 0;
> > if (flags & MEMBARRIER_FLAG_SYNC_CORE)
> > set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
> > + if (flags & MEMBARRIER_FLAG_RSEQ)
> > + set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ;
>
> This one still behaves like a mask, so there is a discrepancy between registration
> and action functions.
Yes, and I vaguely remember you saying that commands being distinct bits
is for "discoverability", not for any "ORing" of commands at the moment.
[...]
> > +SYSCALL_DEFINE3(membarrier, int, cmd, int, flags, int, cpu_id)
> > {
> > - if (unlikely(flags))
> > + if (unlikely(flags) && cmd != MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ)
>
> I would prefer that we deal with flags and cpu_id entirely here rather than
> half here, half below, with e.g.:
>
> switch (cmd) {
> case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
> if (unlikely(flags && flags != MEMBARRIER_CMD_FLAG_CPU))
> return -EINVAL;
> break;
> default:
> if (unlikely(flags))
> return -EINVAL;
> }
>
> if (!(flags & MEMBARRIER_CMD_FLAG_CPU))
> cpu_id = -1;
Done.
[...]