2019-09-02 21:07:15

by Alexey Dobriyan

[permalink] [raw]
Subject: [PATCH] sched: make struct task_struct::state 32-bit

32-bit accesses are shorter than 64-bit accesses on x86_64.
Nothing uses 64-bitness of ->state.

Space savings are ~2KB on F30 kernel config.

Signed-off-by: Alexey Dobriyan <[email protected]>
---

arch/ia64/kernel/perfmon.c | 4 ++--
block/blk-mq.c | 2 +-
drivers/md/dm.c | 4 ++--
fs/userfaultfd.c | 2 +-
include/linux/sched.h | 6 +++---
include/linux/sched/debug.h | 2 +-
include/linux/sched/signal.h | 2 +-
kernel/freezer.c | 2 +-
kernel/kthread.c | 4 ++--
kernel/locking/mutex.c | 6 +++---
kernel/locking/semaphore.c | 2 +-
kernel/rcu/rcutorture.c | 4 ++--
kernel/rcu/tree_stall.h | 6 +++---
kernel/sched/core.c | 8 ++++----
lib/syscall.c | 2 +-
15 files changed, 28 insertions(+), 28 deletions(-)

--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2538,7 +2538,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
if (task == current) return 0;

if (!task_is_stopped_or_traced(task)) {
- DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state));
+ DPRINT(("cannot attach to non-stopped task [%d] state=%d\n", task_pid_nr(task), task->state));
return -EBUSY;
}
/*
@@ -4614,7 +4614,7 @@ pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
return 0;
}

- DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
+ DPRINT(("context %d state=%d [%d] task_state=%d must_stop=%d\n",
ctx->ctx_fd,
state,
task_pid_nr(task),
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3471,7 +3471,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q,
int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
{
struct blk_mq_hw_ctx *hctx;
- long state;
+ int state;

if (!blk_qc_t_valid(cookie) ||
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2424,7 +2424,7 @@ void dm_put(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_put);

-static int dm_wait_for_completion(struct mapped_device *md, long task_state)
+static int dm_wait_for_completion(struct mapped_device *md, int task_state)
{
int r = 0;
DEFINE_WAIT(wait);
@@ -2570,7 +2570,7 @@ static void unlock_fs(struct mapped_device *md)
* are being added to md->deferred list.
*/
static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
- unsigned suspend_flags, long task_state,
+ unsigned suspend_flags, int task_state,
int dmf_suspended_flag)
{
bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -356,7 +356,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
struct userfaultfd_wait_queue uwq;
vm_fault_t ret = VM_FAULT_SIGBUS;
bool must_wait, return_to_userland;
- long blocking_state;
+ int blocking_state;

/*
* We don't do userfault handling for the final child pid update.
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -643,7 +643,7 @@ struct task_struct {
struct thread_info thread_info;
#endif
/* -1 unrunnable, 0 runnable, >0 stopped: */
- volatile long state;
+ volatile int state;

/*
* This begins the randomizable portion of task_struct. Only
@@ -1702,10 +1702,10 @@ extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);

#ifdef CONFIG_SMP
void scheduler_ipi(void);
-extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
+unsigned long wait_task_inactive(struct task_struct *, int match_state);
#else
static inline void scheduler_ipi(void) { }
-static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+static inline unsigned long wait_task_inactive(struct task_struct *p, int match_state)
{
return 1;
}
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -14,7 +14,7 @@ extern void dump_cpu_task(int cpu);
/*
* Only dump TASK_* tasks. (0 for all tasks)
*/
-extern void show_state_filter(unsigned long state_filter);
+void show_state_filter(unsigned int state_filter);

static inline void show_state(void)
{
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -367,7 +367,7 @@ static inline int fatal_signal_pending(struct task_struct *p)
return signal_pending(p) && __fatal_signal_pending(p);
}

-static inline int signal_pending_state(long state, struct task_struct *p)
+static inline int signal_pending_state(int state, struct task_struct *p)
{
if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
return 0;
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -64,7 +64,7 @@ bool __refrigerator(bool check_kthr_stop)
/* Hmm, should we be allowed to suspend when there are realtime
processes around? */
bool was_frozen = false;
- long save = current->state;
+ int save = current->state;

pr_debug("%s entered refrigerator\n", current->comm);

--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -392,7 +392,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
}
EXPORT_SYMBOL(kthread_create_on_node);

-static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state)
+static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, int state)
{
unsigned long flags;

@@ -408,7 +408,7 @@ static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mas
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
}

-static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
+static void __kthread_bind(struct task_struct *p, unsigned int cpu, int state)
{
__kthread_bind_mask(p, cpumask_of(cpu), state);
}
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -897,7 +897,7 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
* Lock a mutex (possibly interruptible), slowpath:
*/
static __always_inline int __sched
-__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
+__mutex_lock_common(struct mutex *lock, int state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
@@ -1071,14 +1071,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
}

static int __sched
-__mutex_lock(struct mutex *lock, long state, unsigned int subclass,
+__mutex_lock(struct mutex *lock, int state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip)
{
return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false);
}

static int __sched
-__ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass,
+__ww_mutex_lock(struct mutex *lock, int state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx)
{
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -201,7 +201,7 @@ struct semaphore_waiter {
* constant, and thus optimised away by the compiler. Likewise the
* 'timeout' parameter for the cases without timeouts.
*/
-static inline int __sched __down_common(struct semaphore *sem, long state,
+static inline int __sched __down_common(struct semaphore *sem, int state,
long timeout)
{
struct semaphore_waiter waiter;
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1472,10 +1472,10 @@ rcu_torture_stats_print(void)
srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
&flags, &gp_seq);
wtp = READ_ONCE(writer_task);
- pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#lx cpu %d\n",
+ pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#x cpu %d\n",
rcu_torture_writer_state_getname(),
rcu_torture_writer_state, gp_seq, flags,
- wtp == NULL ? ~0UL : wtp->state,
+ wtp == NULL ? ~0U : wtp->state,
wtp == NULL ? -1 : (int)task_cpu(wtp));
if (!splatted && wtp) {
sched_show_task(wtp);
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -337,7 +337,7 @@ static void rcu_check_gp_kthread_starvation(void)

j = jiffies - READ_ONCE(rcu_state.gp_activity);
if (j > 2 * HZ) {
- pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
+ pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n",
rcu_state.name, j,
(long)rcu_seq_current(&rcu_state.gp_seq),
READ_ONCE(rcu_state.gp_flags),
@@ -559,10 +559,10 @@ void show_rcu_gp_kthreads(void)
ja = j - READ_ONCE(rcu_state.gp_activity);
jr = j - READ_ONCE(rcu_state.gp_req_activity);
jw = j - READ_ONCE(rcu_state.gp_wake_time);
- pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
+ pr_info("%s: wait state: %s(%d) ->state: %#x delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
rcu_state.name, gp_state_getname(rcu_state.gp_state),
rcu_state.gp_state,
- rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
+ rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffff,
ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
(long)READ_ONCE(rcu_state.gp_seq),
(long)READ_ONCE(rcu_get_root()->gp_seq_needed),
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1769,7 +1769,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p,
* smp_call_function() if an IPI is sent by the same process we are
* waiting to become inactive.
*/
-unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+unsigned long wait_task_inactive(struct task_struct *p, int match_state)
{
int running, queued;
struct rq_flags rf;
@@ -5761,7 +5761,7 @@ void sched_show_task(struct task_struct *p)
EXPORT_SYMBOL_GPL(sched_show_task);

static inline bool
-state_filter_match(unsigned long state_filter, struct task_struct *p)
+state_filter_match(unsigned int state_filter, struct task_struct *p)
{
/* no filter, everything matches */
if (!state_filter)
@@ -5782,7 +5782,7 @@ state_filter_match(unsigned long state_filter, struct task_struct *p)
}


-void show_state_filter(unsigned long state_filter)
+void show_state_filter(unsigned int state_filter)
{
struct task_struct *g, *p;

@@ -6553,7 +6553,7 @@ void __might_sleep(const char *file, int line, int preempt_offset)
*/
WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
"do not call blocking ops when !TASK_RUNNING; "
- "state=%lx set at [<%p>] %pS\n",
+ "state=%x set at [<%p>] %pS\n",
current->state,
(void *)current->task_state_change,
(void *)current->task_state_change);
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -61,7 +61,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info
*/
int task_current_syscall(struct task_struct *target, struct syscall_info *info)
{
- long state;
+ int state;
unsigned long ncsw;

if (target == current)


2019-09-02 23:05:15

by Valentin Schneider

[permalink] [raw]
Subject: Re: [PATCH] sched: make struct task_struct::state 32-bit

Hi,

On 02/09/2019 22:05, Alexey Dobriyan wrote:
> 32-bit accesses are shorter than 64-bit accesses on x86_64.
> Nothing uses 64-bitness of ->state.
>
> Space savings are ~2KB on F30 kernel config.
>
> Signed-off-by: Alexey Dobriyan <[email protected]>
> ---

Interestingly this has been volatile long since forever (or 2002, which is
my take on "forever" given the history tree), although the task states
seem to have never gone above 0x1000 (the current TASK_STATE_MAX).

[...]

> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -643,7 +643,7 @@ struct task_struct {
> struct thread_info thread_info;
> #endif
> /* -1 unrunnable, 0 runnable, >0 stopped: */
> - volatile long state;
> + volatile int state;

This leads to having some padding after this field (w/o randomization):

struct task_struct {
struct thread_info thread_info; /* 0 24 */
volatile int state; /* 24 4 */

/* XXX 4 bytes hole, try to pack */

void * stack; /* 32 8 */

Though seeing as this is also the boundary of the randomized layout we can't
really do much better without changing the boundary itself. So much for
cacheline use :/

Anyway, task_struct doesn't shrink but we can cut some corners in the asm,
I guess that's fine?

[...]

2019-09-03 06:53:04

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [dm-devel] [PATCH] sched: make struct task_struct::state 32-bit

On Tue, Sep 03, 2019 at 12:05:58AM +0300, Alexey Dobriyan wrote:
> 32-bit accesses are shorter than 64-bit accesses on x86_64.
> Nothing uses 64-bitness of ->state.
>
> Space savings are ~2KB on F30 kernel config.

I guess we'd save even more when moving from a volatile to
WRITE_ONCE/READ_ONCE..

2019-09-03 07:15:11

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [dm-devel] [PATCH] sched: make struct task_struct::state 32-bit

On Mon, Sep 02, 2019 at 11:51:55PM -0700, Christoph Hellwig wrote:
> On Tue, Sep 03, 2019 at 12:05:58AM +0300, Alexey Dobriyan wrote:
> > 32-bit accesses are shorter than 64-bit accesses on x86_64.
> > Nothing uses 64-bitness of ->state.
> >
> > Space savings are ~2KB on F30 kernel config.
>
> I guess we'd save even more when moving from a volatile to
> WRITE_ONCE/READ_ONCE..

I doubt it; pretty much all accesses really should be using that.

Not saying we shouldn't maybe do that; but that's going to be massive
churn.

2019-09-03 16:25:34

by Alexey Dobriyan

[permalink] [raw]
Subject: Re: [PATCH] sched: make struct task_struct::state 32-bit

On Tue, Sep 03, 2019 at 12:02:38AM +0100, Valentin Schneider wrote:
> struct task_struct {
> struct thread_info thread_info; /* 0 24 */
> volatile int state; /* 24 4 */
>
> /* XXX 4 bytes hole, try to pack */
>
> void * stack; /* 32 8 */
>
> Though seeing as this is also the boundary of the randomized layout we can't
> really do much better without changing the boundary itself. So much for
> cacheline use :/

Cacheline use of task_struct is pretty hopeless because of all the ifdefs.

2019-09-03 16:34:46

by Valentin Schneider

[permalink] [raw]
Subject: Re: [PATCH] sched: make struct task_struct::state 32-bit



On 03/09/2019 17:23, Alexey Dobriyan wrote:
> On Tue, Sep 03, 2019 at 12:02:38AM +0100, Valentin Schneider wrote:
>> struct task_struct {
>> struct thread_info thread_info; /* 0 24 */
>> volatile int state; /* 24 4 */
>>
>> /* XXX 4 bytes hole, try to pack */
>>
>> void * stack; /* 32 8 */
>>
>> Though seeing as this is also the boundary of the randomized layout we can't
>> really do much better without changing the boundary itself. So much for
>> cacheline use :/
>
> Cacheline use of task_struct is pretty hopeless because of all the ifdefs.
>

Yeah I figured, then had a minute of silence for those forsaken bytes.

2019-09-03 17:30:38

by Valentin Schneider

[permalink] [raw]
Subject: Re: [PATCH] sched: make struct task_struct::state 32-bit

On 02/09/2019 22:05, Alexey Dobriyan wrote:
> 32-bit accesses are shorter than 64-bit accesses on x86_64.
> Nothing uses 64-bitness of ->state.
>
> Space savings are ~2KB on F30 kernel config.
>
> Signed-off-by: Alexey Dobriyan <[email protected]>
> ---
>
> arch/ia64/kernel/perfmon.c | 4 ++--
> block/blk-mq.c | 2 +-
> drivers/md/dm.c | 4 ++--
> fs/userfaultfd.c | 2 +-
> include/linux/sched.h | 6 +++---
> include/linux/sched/debug.h | 2 +-
> include/linux/sched/signal.h | 2 +-
> kernel/freezer.c | 2 +-
> kernel/kthread.c | 4 ++--
> kernel/locking/mutex.c | 6 +++---
> kernel/locking/semaphore.c | 2 +-
> kernel/rcu/rcutorture.c | 4 ++--
> kernel/rcu/tree_stall.h | 6 +++---
> kernel/sched/core.c | 8 ++++----
> lib/syscall.c | 2 +-
> 15 files changed, 28 insertions(+), 28 deletions(-)
>

It looks like you missed a few places. There's a long prev_state in
sched/core.c::finish_task_switch() for instance.

I suppose that's where coccinelle oughta help but I'm really not fluent
in that. Is there a way to make it match p.state accesses with p task_struct?
And if so, can we make it change the type of the variable being read from
/ written to?

How did you come up with this changeset, did you pickaxe for some regexp?

[...]

2019-09-03 18:20:27

by Alexey Dobriyan

[permalink] [raw]
Subject: Re: [PATCH] sched: make struct task_struct::state 32-bit

On Tue, Sep 03, 2019 at 06:29:06PM +0100, Valentin Schneider wrote:
> On 02/09/2019 22:05, Alexey Dobriyan wrote:
> > 32-bit accesses are shorter than 64-bit accesses on x86_64.
> > Nothing uses 64-bitness of ->state.

> It looks like you missed a few places. There's a long prev_state in
> sched/core.c::finish_task_switch() for instance.
>
> I suppose that's where coccinelle oughta help but I'm really not fluent
> in that. Is there a way to make it match p.state accesses with p task_struct?
> And if so, can we make it change the type of the variable being read from
> / written to?

Coccinelle is interesting: basic

- foo
+ bar

doesn't find "foo" in function arguments.

I'm scared of coccinelle.

> How did you come up with this changeset, did you pickaxe for some regexp?

No, manually, backtracking up to the call chain.
Maybe I missed a few places.

2019-09-03 21:54:07

by Valentin Schneider

[permalink] [raw]
Subject: Re: [PATCH] sched: make struct task_struct::state 32-bit

On 03/09/2019 19:19, Alexey Dobriyan wrote:
> On Tue, Sep 03, 2019 at 06:29:06PM +0100, Valentin Schneider wrote:
>> On 02/09/2019 22:05, Alexey Dobriyan wrote:
>>> 32-bit accesses are shorter than 64-bit accesses on x86_64.
>>> Nothing uses 64-bitness of ->state.
>
>> It looks like you missed a few places. There's a long prev_state in
>> sched/core.c::finish_task_switch() for instance.
>>
>> I suppose that's where coccinelle oughta help but I'm really not fluent
>> in that. Is there a way to make it match p.state accesses with p task_struct?
>> And if so, can we make it change the type of the variable being read from
>> / written to?
>
> Coccinelle is interesting: basic
>
> - foo
> + bar
>
> doesn't find "foo" in function arguments.
>
> I'm scared of coccinelle.>

So am I, but I'm even more scared of having no other way of verifying this
than doing it "by hand". It's nothing critical here - just some variables
that will remain long instead of being int, but I'd like to have some way to
verify the change. A coccinelle script would be great, even if it misses
some places, I can at least have some trust in it.

If I have to verify the whole tree by hand, even with grep/ag, I don't trust
I'll do it right.


I gave Coccinelle a try, I think I got something for in-function variables:

---
@state_var@
struct task_struct *p;
identifier prev_state;
@@
prev_state = p->state

@@
identifier state_var.prev_state;
@@
- long prev_state;
+ int prev_state;
---

I tried something for function parameters, which seems to be feasible
according to [1], but couldn't get it to work (yet). Here's what I have
so far:

---
@func_param@
identifier func;
identifier p;
identifier state;
identifier mask;
@@

func(struct task_struct *p, const struct cpumask *mask, long state)
{
...
}

@@
identifier func_param.func;
identifier func_param.state;
expression E1;
expression E2;
@@

func(E1,
- long state,
+ int state,
E2)
---

(it should match against kernel/kthread.c::__kthread_bind_mask() but it
complains about me not knowing how to write coccinelle patches).

With a mix of these we might get somewhere...

[1]: http://coccinelle.lip6.fr/docs/main_grammar016.html

>> How did you come up with this changeset, did you pickaxe for some regexp?
>
> No, manually, backtracking up to the call chain.
> Maybe I missed a few places.
>

2019-09-04 09:44:44

by David Laight

[permalink] [raw]
Subject: RE: [PATCH] sched: make struct task_struct::state 32-bit

From: Alexey Dobriyan
> Sent: 03 September 2019 19:19
...
> > How did you come up with this changeset, did you pickaxe for some regexp?
>
> No, manually, backtracking up to the call chain.
> Maybe I missed a few places.

Renaming the structure field and getting the compiler to find all the uses can help.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2019-09-04 10:26:22

by Valentin Schneider

[permalink] [raw]
Subject: Re: [PATCH] sched: make struct task_struct::state 32-bit

On 04/09/2019 10:43, David Laight wrote:
> From: Alexey Dobriyan
>> Sent: 03 September 2019 19:19
> ...
>>> How did you come up with this changeset, did you pickaxe for some regexp?
>>
>> No, manually, backtracking up to the call chain.
>> Maybe I missed a few places.
>
> Renaming the structure field and getting the compiler to find all the uses can help.
>
> David
>

It's a good idea but sadly doesn't cover whatever the config doesn't
compile. A safer starting point could be

---
@state_var@
struct task_struct *p;
identifier var;
@@
(
var = p->state
|
p->state = var
)

@@
identifier state_var.var;
@@
- var
+ FIXME
---

But I'm hoping we can get something even better from coccinelle, stay
tuned...

> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
>

2019-09-04 12:08:41

by Valentin Schneider

[permalink] [raw]
Subject: Re: [PATCH] sched: make struct task_struct::state 32-bit

On 03/09/2019 22:51, Valentin Schneider wrote:
[...]
> I tried something for function parameters, which seems to be feasible
> according to [1], but couldn't get it to work (yet). Here's what I have
> so far:
>
[...]

So now I have this:

---
@funcmatch@
identifier func;
identifier p;
identifier state_var;
@@

func(..., struct task_struct *p, ...
- , long state_var
+ , int state_var
,...)
{
...
}
---

Which seems to be doing roughly what I want. I probably need another
version to cover functions with reverse parameter order, and also need
to make it match functions that assign state_var to p->state (or pass it
down to another function that might do it).

Baby steps...

2019-09-04 17:51:08

by Valentin Schneider

[permalink] [raw]
Subject: Re: [PATCH] sched: make struct task_struct::state 32-bit

On 04/09/2019 13:07, Valentin Schneider wrote:
> [...]
> Baby steps...


There's something regarding coccinelle disjunctions that just can't grasp,
and this also fails to recognize "current" as being "struct task_struct*".

Once I fix these, it's "just" a matter of finding out how to write a rule
for layered calls (e.g. __kthread_bind() -> __kthread_bind_mask() ->
wait_task_inactive()), and we should be close to having something somewhat
usable.

---
virtual patch
virtual report

@state_access@
identifier func;
struct task_struct *p;
identifier state_var;
position fpos;
position epos;
@@

func(...)@fpos
{
<...
(
p->state & state_var@epos
|
p->state | state_var@epos
|
p->state < state_var@epos
|
p->state > state_var@epos
|
state_var@epos = p->state
// For some reason adding this disjunction gives us more matches, but causes
// some to go away :/
// |
// p->state == state_var@epos
|
p->state != state_var@epos
)
...>
}

@depends on patch@
identifier fn = state_access.func;
identifier state_var = state_access.state_var;
@@

fn(...,
- long state_var
+ int state_var
,...)
{
...
}

// Should be merged in the above but can't get disjunction to work
@depends on patch@
identifier fn = state_access.func;
identifier state_var = state_access.state_var;
@@

fn(...,
- unsigned long state_var
+ unsigned int state_var
,...)
{
...
}

// Is it possible to match without semicolons? :/
@depends on patch@
identifier state_var = state_access.state_var;
expression E;
@@

(
- long state_var;
+ int state_var;
|
- long state_var = E;
+ int state_var = E;
)

@script:python depends on report@
fp << state_access.fpos;
ep << state_access.epos;
@@
cocci.print_main("Func at", fp)
cocci.print_main("Expr at", ep)

2019-09-05 20:50:56

by Markus Elfring

[permalink] [raw]
Subject: Re: sched: make struct task_struct::state 32-bit

> @funcmatch@
> identifier func;
> identifier p;
> identifier state_var;
> @@
>
> func(..., struct task_struct *p, ...
> - , long state_var
> + , int state_var
> ,...)
> {
> ...
> }

> Which seems to be doing roughly what I want.

Can a transformation approach like the following work also
for your software?

@replacement@

identifier func, p, state_var;

@@

func(...,
struct task_struct *p,
...
,
- long
+ int
state_var
,
...)

{

...

}



Regards,
Markus

2019-09-05 21:21:20

by Valentin Schneider

[permalink] [raw]
Subject: Re: sched: make struct task_struct::state 32-bit



On 05/09/2019 16:51, Markus Elfring wrote:
> Can a transformation approach like the following work also
> for your software?
>
> @replacement@
>
> identifier func, p, state_var;
>
> @@
>
> func(...,
> struct task_struct *p,
> ...
> ,
> - long
> + int
> state_var
> ,
> ...)
>
> {
>
> ...
>
> }
>
>

I actually got rid of the task_struct* parameter and now just match
against task_struct.p accesses in the function body, which has the
added bonus of not caring about the order of the parameters.

Still not there yet but making progress in the background, hope it's
passable entertainment to see me struggle my way there :)

>
> Regards,
> Markus
>

2019-09-25 11:02:24

by Valentin Schneider

[permalink] [raw]
Subject: Re: sched: make struct task_struct::state 32-bit

On 05/09/2019 17:52, Valentin Schneider wrote:
> I actually got rid of the task_struct* parameter and now just match
> against task_struct.p accesses in the function body, which has the
> added bonus of not caring about the order of the parameters.
>
> Still not there yet but making progress in the background, hope it's
> passable entertainment to see me struggle my way there :)
>

Bit of hiatus on my end there. I did play around some more with Coccinelle
on the way to/from Plumbers. The main problems I'm facing ATM is "current"
not being recognized as a task_struct* expression, and the need to
"recursively" match task_struct.state modifiers, i.e. catch both functions
for something like:

foo(long state)
{
__foo(state);
}

__foo(long state)
{
current->state = state;
}


Here's where I'm at:
---
virtual patch
virtual report

// Match variables that represent task states
// They can be read from / written to task_struct.state, or be compared
// to TASK_* values
@state_access@
struct task_struct *p;
// FIXME: current not recognized as task_struct*, fixhack with regexp
identifier current =~ "^current$";
identifier task_state =~ "^TASK_";
identifier state_var;
position pos;
@@

(
p->state & state_var@pos
|
current->state & state_var@pos
|
p->state | state_var@pos
|
current->state | state_var@pos
|
p->state < state_var@pos
|
current->state < state_var@pos
|
p->state > state_var@pos
|
current->state > state_var@pos
|
state_var@pos = p->state
|
state_var@pos = current->state
|
p->state == state_var@pos
|
current->state == state_var@pos
|
p->state != state_var@pos
|
current->state != state_var@pos
|
// FIXME: match functions that do something with state_var underneath?
// How to do recursive rules?
set_current_state(state_var@pos)
|
set_special_state(state_var@pos)
|
signal_pending_state(state_var@pos, p)
|
signal_pending_state(state_var@pos, current)
|
state_var@pos & task_state
|
state_var@pos | task_state
)

// Fixup local variables
@depends on patch && state_access@
identifier state_var = state_access.state_var;
@@
(
- long
+ int
|
- unsigned long
+ unsigned int
)
state_var;

// Fixup function parameters
@depends on patch && state_access@
identifier fn;
identifier state_var = state_access.state_var;
@@

fn(...,
- long state_var
+ int state_var
,...)
{
...
}

// FIXME: find a way to squash that with the above?
// Fixup function parameters
@depends on patch && state_access@
identifier fn;
identifier state_var = state_access.state_var;
@@

fn(...,
- unsigned long
+ unsigned int
state_var
,...)
{
...
}
---

This gives me the following diff on kernel/:

---
diff -u -p a/locking/mutex.c b/locking/mutex.c
--- a/locking/mutex.c
+++ b/locking/mutex.c
@@ -923,7 +923,7 @@ __ww_mutex_add_waiter(struct mutex_waite
* Lock a mutex (possibly interruptible), slowpath:
*/
static __always_inline int __sched
-__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
+__mutex_lock_common(struct mutex *lock, int state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
@@ -1097,14 +1097,14 @@ err_early_kill:
}

static int __sched
-__mutex_lock(struct mutex *lock, long state, unsigned int subclass,
+__mutex_lock(struct mutex *lock, int state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip)
{
return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false);
}

static int __sched
-__ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass,
+__ww_mutex_lock(struct mutex *lock, int state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx)
{
diff -u -p a/locking/semaphore.c b/locking/semaphore.c
--- a/locking/semaphore.c
+++ b/locking/semaphore.c
@@ -201,7 +201,7 @@ struct semaphore_waiter {
* constant, and thus optimised away by the compiler. Likewise the
* 'timeout' parameter for the cases without timeouts.
*/
-static inline int __sched __down_common(struct semaphore *sem, long state,
+static inline int __sched __down_common(struct semaphore *sem, int state,
long timeout)
{
struct semaphore_waiter waiter;
diff -u -p a/freezer.c b/freezer.c
--- a/freezer.c
+++ b/freezer.c
@@ -64,7 +64,7 @@ bool __refrigerator(bool check_kthr_stop
/* Hmm, should we be allowed to suspend when there are realtime
processes around? */
bool was_frozen = false;
- long save = current->state;
+ int save = current->state;

pr_debug("%s entered refrigerator\n", current->comm);

diff -u -p a/sched/core.c b/sched/core.c
--- a/sched/core.c
+++ b/sched/core.c
@@ -1888,7 +1888,7 @@ out:
* smp_call_function() if an IPI is sent by the same process we are
* waiting to become inactive.
*/
-unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+unsigned long wait_task_inactive(struct task_struct *p, int match_state)
{
int running, queued;
struct rq_flags rf;
@@ -3185,7 +3185,7 @@ static struct rq *finish_task_switch(str
{
struct rq *rq = this_rq();
struct mm_struct *mm = rq->prev_mm;
- long prev_state;
+ int prev_state;

/*
* The previous task will have left us with a preempt_count of 2
@@ -5964,7 +5964,7 @@ void sched_show_task(struct task_struct
EXPORT_SYMBOL_GPL(sched_show_task);

static inline bool
-state_filter_match(unsigned long state_filter, struct task_struct *p)
+state_filter_match(unsigned int state_filter, struct task_struct *p)
{
/* no filter, everything matches */
if (!state_filter)
@@ -5985,7 +5985,7 @@ state_filter_match(unsigned long state_f
}


-void show_state_filter(unsigned long state_filter)
+void show_state_filter(unsigned int state_filter)
{
struct task_struct *g, *p;

---

2019-09-25 11:02:54

by Julia Lawall

[permalink] [raw]
Subject: Re: sched: make struct task_struct::state 32-bit



On Mon, 23 Sep 2019, Valentin Schneider wrote:

> On 05/09/2019 17:52, Valentin Schneider wrote:
> > I actually got rid of the task_struct* parameter and now just match
> > against task_struct.p accesses in the function body, which has the
> > added bonus of not caring about the order of the parameters.
> >
> > Still not there yet but making progress in the background, hope it's
> > passable entertainment to see me struggle my way there :)
> >
>
> Bit of hiatus on my end there. I did play around some more with Coccinelle
> on the way to/from Plumbers. The main problems I'm facing ATM is "current"
> not being recognized as a task_struct* expression, and the need to
> "recursively" match task_struct.state modifiers, i.e. catch both functions
> for something like:
>
> foo(long state)
> {
> __foo(state);
> }
>
> __foo(long state)
> {
> current->state = state;
> }
>
>
> Here's where I'm at:
> ---
> virtual patch
> virtual report
>
> // Match variables that represent task states
> // They can be read from / written to task_struct.state, or be compared
> // to TASK_* values
> @state_access@
> struct task_struct *p;
> // FIXME: current not recognized as task_struct*, fixhack with regexp
> identifier current =~ "^current$";

Please don't do this. Just use the word current. It doesn't have to be a
metavariable. You will though get a warning about it. To eliminate the
warning, you can say symbol current;

> identifier task_state =~ "^TASK_";

Are there a lot of options? You can also enumerate them in {}, ie

identifier task_state = {TASK_BLAH, TASK_BLAHBLAH};

> identifier state_var;
> position pos;
> @@
>
> (
> p->state & state_var@pos
> |
> current->state & state_var@pos
> |
> p->state | state_var@pos
> |
> current->state | state_var@pos
> |
> p->state < state_var@pos
> |
> current->state < state_var@pos
> |
> p->state > state_var@pos
> |
> current->state > state_var@pos
> |
> state_var@pos = p->state
> |
> state_var@pos = current->state
> |
> p->state == state_var@pos
> |
> current->state == state_var@pos
> |
> p->state != state_var@pos
> |
> current->state != state_var@pos
> |
> // FIXME: match functions that do something with state_var underneath?
> // How to do recursive rules?

You want to look at the definitions of called functions? Coccinelle
doesn't really support that, but there are hackish ways to add that. How
many function calls would you expect have to be unrolled?

> set_current_state(state_var@pos)
> |
> set_special_state(state_var@pos)
> |
> signal_pending_state(state_var@pos, p)
> |
> signal_pending_state(state_var@pos, current)
> |
> state_var@pos & task_state
> |
> state_var@pos | task_state
> )
>
> // Fixup local variables
> @depends on patch && state_access@
> identifier state_var = state_access.state_var;
> @@
> (
> - long
> + int
> |
> - unsigned long
> + unsigned int
> )
> state_var;
>
> // Fixup function parameters
> @depends on patch && state_access@
> identifier fn;
> identifier state_var = state_access.state_var;
> @@
>
> fn(...,
> - long state_var
> + int state_var
> ,...)
> {
> ...
> }
>
> // FIXME: find a way to squash that with the above?

I think that you can make a disjunction on a function parameter

fn(...,
(
- T1 x1
+ T2 x2
|
- T3 x3
+ T4 x4
)
, ...) { ... }

julia

> // Fixup function parameters
> @depends on patch && state_access@
> identifier fn;
> identifier state_var = state_access.state_var;
> @@
>
> fn(...,
> - unsigned long
> + unsigned int
> state_var
> ,...)
> {
> ...
> }
> ---
>
> This gives me the following diff on kernel/:
>
> ---
> diff -u -p a/locking/mutex.c b/locking/mutex.c
> --- a/locking/mutex.c
> +++ b/locking/mutex.c
> @@ -923,7 +923,7 @@ __ww_mutex_add_waiter(struct mutex_waite
> * Lock a mutex (possibly interruptible), slowpath:
> */
> static __always_inline int __sched
> -__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
> +__mutex_lock_common(struct mutex *lock, int state, unsigned int subclass,
> struct lockdep_map *nest_lock, unsigned long ip,
> struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
> {
> @@ -1097,14 +1097,14 @@ err_early_kill:
> }
>
> static int __sched
> -__mutex_lock(struct mutex *lock, long state, unsigned int subclass,
> +__mutex_lock(struct mutex *lock, int state, unsigned int subclass,
> struct lockdep_map *nest_lock, unsigned long ip)
> {
> return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false);
> }
>
> static int __sched
> -__ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass,
> +__ww_mutex_lock(struct mutex *lock, int state, unsigned int subclass,
> struct lockdep_map *nest_lock, unsigned long ip,
> struct ww_acquire_ctx *ww_ctx)
> {
> diff -u -p a/locking/semaphore.c b/locking/semaphore.c
> --- a/locking/semaphore.c
> +++ b/locking/semaphore.c
> @@ -201,7 +201,7 @@ struct semaphore_waiter {
> * constant, and thus optimised away by the compiler. Likewise the
> * 'timeout' parameter for the cases without timeouts.
> */
> -static inline int __sched __down_common(struct semaphore *sem, long state,
> +static inline int __sched __down_common(struct semaphore *sem, int state,
> long timeout)
> {
> struct semaphore_waiter waiter;
> diff -u -p a/freezer.c b/freezer.c
> --- a/freezer.c
> +++ b/freezer.c
> @@ -64,7 +64,7 @@ bool __refrigerator(bool check_kthr_stop
> /* Hmm, should we be allowed to suspend when there are realtime
> processes around? */
> bool was_frozen = false;
> - long save = current->state;
> + int save = current->state;
>
> pr_debug("%s entered refrigerator\n", current->comm);
>
> diff -u -p a/sched/core.c b/sched/core.c
> --- a/sched/core.c
> +++ b/sched/core.c
> @@ -1888,7 +1888,7 @@ out:
> * smp_call_function() if an IPI is sent by the same process we are
> * waiting to become inactive.
> */
> -unsigned long wait_task_inactive(struct task_struct *p, long match_state)
> +unsigned long wait_task_inactive(struct task_struct *p, int match_state)
> {
> int running, queued;
> struct rq_flags rf;
> @@ -3185,7 +3185,7 @@ static struct rq *finish_task_switch(str
> {
> struct rq *rq = this_rq();
> struct mm_struct *mm = rq->prev_mm;
> - long prev_state;
> + int prev_state;
>
> /*
> * The previous task will have left us with a preempt_count of 2
> @@ -5964,7 +5964,7 @@ void sched_show_task(struct task_struct
> EXPORT_SYMBOL_GPL(sched_show_task);
>
> static inline bool
> -state_filter_match(unsigned long state_filter, struct task_struct *p)
> +state_filter_match(unsigned int state_filter, struct task_struct *p)
> {
> /* no filter, everything matches */
> if (!state_filter)
> @@ -5985,7 +5985,7 @@ state_filter_match(unsigned long state_f
> }
>
>
> -void show_state_filter(unsigned long state_filter)
> +void show_state_filter(unsigned int state_filter)
> {
> struct task_struct *g, *p;
>
> ---
>

2019-09-26 00:51:42

by Markus Elfring

[permalink] [raw]
Subject: Re: sched: make struct task_struct::state 32-bit

> // FIXME: current not recognized as task_struct*, fixhack with regexp
> identifier current =~ "^current$";

Would you really like to use a regular expression for finding a single word?


> identifier state_var;
> position pos;
> @@
>
> (
> p->state & state_var@pos
> |
> current->state & state_var@pos
> |

I see further opportunities to make such a SmPL disjunction more succinct.

*
( ( \( p \| current \) ) -> state & state_var@pos
|


* How do you think about to work with a SmPL constraint
for a metavariable with the type “binary operator”?


> set_current_state(state_var@pos)
> |
> set_special_state(state_var@pos)

| \( set_current_state \| set_special_state \) (state_var@pos)


> |
> signal_pending_state(state_var@pos, p)
> |
> signal_pending_state(state_var@pos, current)

| signal_pending_state(state_var@pos, \( p \| current \) )


Regards,
Markus