2003-01-13 07:16:41

by Rusty Russell

[permalink] [raw]
Subject: [PATCH] __cacheline_aligned_in_smp?

Dave: Anton suggested you might have a justification for
__cacheline_aligned doing something on UP?

I think I'd prefer __cacheline_aligned to be the same as
__cacheline_aligned_in_smp, and have a new __cacheline_aligned_always
for those who REALLY want it (if any).

Thoughts?
Rusty.

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/drivers/cdrom/sbpcd.c working-2.5-bk-cacheline-nosmp/drivers/cdrom/sbpcd.c
--- linux-2.5-bk/drivers/cdrom/sbpcd.c 2003-01-02 12:45:18.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/drivers/cdrom/sbpcd.c 2003-01-13 18:19:08.000000000 +1100
@@ -462,7 +462,7 @@ static int sbpcd[] =
/*
* Protects access to global structures etc.
*/
-static spinlock_t sbpcd_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
+static spinlock_t sbpcd_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
static struct request_queue sbpcd_queue;

MODULE_PARM(sbpcd, "2i");
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/drivers/char/random.c working-2.5-bk-cacheline-nosmp/drivers/char/random.c
--- linux-2.5-bk/drivers/char/random.c 2003-01-02 14:47:58.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/drivers/char/random.c 2003-01-13 18:18:57.000000000 +1100
@@ -2050,7 +2050,7 @@ static struct keydata {
time_t rekey_time;
__u32 count; // already shifted to the final position
__u32 secret[12];
-} ____cacheline_aligned ip_keydata[2];
+} ____cacheline_aligned_in_smp ip_keydata[2];

static spinlock_t ip_lock = SPIN_LOCK_UNLOCKED;
static unsigned int ip_cnt;
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/drivers/oprofile/cpu_buffer.c working-2.5-bk-cacheline-nosmp/drivers/oprofile/cpu_buffer.c
--- linux-2.5-bk/drivers/oprofile/cpu_buffer.c 2003-01-02 12:47:01.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/drivers/oprofile/cpu_buffer.c 2003-01-13 18:19:21.000000000 +1100
@@ -24,7 +24,7 @@
#include "cpu_buffer.h"
#include "oprof.h"

-struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned;
+struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned_in_smp;

static unsigned long buffer_size;

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/drivers/oprofile/cpu_buffer.h working-2.5-bk-cacheline-nosmp/drivers/oprofile/cpu_buffer.h
--- linux-2.5-bk/drivers/oprofile/cpu_buffer.h 2003-01-02 12:45:22.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/drivers/oprofile/cpu_buffer.h 2003-01-13 18:19:50.000000000 +1100
@@ -39,7 +39,7 @@ struct oprofile_cpu_buffer {
unsigned long sample_lost_locked;
unsigned long sample_lost_overflow;
unsigned long sample_lost_task_exit;
-} ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;

extern struct oprofile_cpu_buffer cpu_buffer[];

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/drivers/scsi/scsi.c working-2.5-bk-cacheline-nosmp/drivers/scsi/scsi.c
--- linux-2.5-bk/drivers/scsi/scsi.c 2003-01-02 12:47:02.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/drivers/scsi/scsi.c 2003-01-13 18:19:03.000000000 +1100
@@ -104,7 +104,7 @@ struct softscsi_data {
Scsi_Cmnd *tail;
};

-static struct softscsi_data softscsi_data[NR_CPUS] __cacheline_aligned;
+static struct softscsi_data softscsi_data[NR_CPUS] __cacheline_aligned_in_smp;

/*
* List of all highlevel drivers.
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/include/linux/dcache.h working-2.5-bk-cacheline-nosmp/include/linux/dcache.h
--- linux-2.5-bk/include/linux/dcache.h 2003-01-02 12:36:08.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/include/linux/dcache.h 2003-01-13 18:17:28.000000000 +1100
@@ -89,7 +89,7 @@ struct dentry {
void * d_fsdata; /* fs-specific data */
struct dcookie_struct * d_cookie; /* cookie, if any */
unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */
-} ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;

#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/include/linux/module.h working-2.5-bk-cacheline-nosmp/include/linux/module.h
--- linux-2.5-bk/include/linux/module.h 2003-01-13 16:56:29.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/include/linux/module.h 2003-01-13 18:17:25.000000000 +1100
@@ -155,7 +155,7 @@ void *__symbol_get_gpl(const char *symbo
struct module_ref
{
atomic_t count;
-} ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;

enum module_state
{
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/include/linux/netdevice.h working-2.5-bk-cacheline-nosmp/include/linux/netdevice.h
--- linux-2.5-bk/include/linux/netdevice.h 2003-01-10 10:55:43.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/include/linux/netdevice.h 2003-01-13 18:17:22.000000000 +1100
@@ -163,7 +163,7 @@ struct netif_rx_stats
unsigned fastroute_deferred_out;
unsigned fastroute_latency_reduction;
unsigned cpu_collision;
-} ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;

extern struct netif_rx_stats netdev_rx_stat[];

@@ -508,7 +508,7 @@ struct softnet_data
struct sk_buff *completion_queue;

struct net_device backlog_dev; /* Sorry. 8) */
-} ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;


extern struct softnet_data softnet_data[NR_CPUS];
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/include/linux/netfilter_bridge/ebtables.h working-2.5-bk-cacheline-nosmp/include/linux/netfilter_bridge/ebtables.h
--- linux-2.5-bk/include/linux/netfilter_bridge/ebtables.h 2003-01-02 12:32:48.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/include/linux/netfilter_bridge/ebtables.h 2003-01-13 18:17:52.000000000 +1100
@@ -243,7 +243,7 @@ struct ebt_table_info
// room to maintain the stack used for jumping from and into udc
struct ebt_chainstack **chainstack;
char *entries;
- struct ebt_counter counters[0] ____cacheline_aligned;
+ struct ebt_counter counters[0] ____cacheline_aligned_in_smp;
};

struct ebt_table
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/kernel/acct.c working-2.5-bk-cacheline-nosmp/kernel/acct.c
--- linux-2.5-bk/kernel/acct.c 2003-01-02 14:48:01.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/kernel/acct.c 2003-01-13 18:16:51.000000000 +1100
@@ -83,7 +83,7 @@ struct acct_glbs {
struct timer_list timer;
};

-static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED};
+static struct acct_glbs acct_globals __cacheline_aligned_in_smp = {SPIN_LOCK_UNLOCKED};

/*
* Called whenever the timer says to check the free space.
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/kernel/fork.c working-2.5-bk-cacheline-nosmp/kernel/fork.c
--- linux-2.5-bk/kernel/fork.c 2003-01-13 16:56:30.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/kernel/fork.c 2003-01-13 18:16:36.000000000 +1100
@@ -48,14 +48,14 @@ int nr_threads;
int max_threads;
unsigned long total_forks; /* Handle normal Linux uptimes. */

-rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
+rwlock_t tasklist_lock __cacheline_aligned_in_smp = RW_LOCK_UNLOCKED; /* outer */

/*
* A per-CPU task cache - this relies on the fact that
* the very last portion of sys_exit() is executed with
* preemption turned off.
*/
-static task_t *task_cache[NR_CPUS] __cacheline_aligned;
+static task_t *task_cache[NR_CPUS] __cacheline_aligned_in_smp;

void __put_task_struct(struct task_struct *tsk)
{
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/kernel/sched.c working-2.5-bk-cacheline-nosmp/kernel/sched.c
--- linux-2.5-bk/kernel/sched.c 2003-01-13 16:56:30.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/kernel/sched.c 2003-01-13 18:16:16.000000000 +1100
@@ -158,9 +158,9 @@ struct runqueue {
struct list_head migration_queue;

atomic_t nr_iowait;
-} ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;

-static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
+static struct runqueue runqueues[NR_CPUS] __cacheline_aligned_in_smp;

#define cpu_rq(cpu) (runqueues + (cpu))
#define this_rq() cpu_rq(smp_processor_id())
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/kernel/softirq.c working-2.5-bk-cacheline-nosmp/kernel/softirq.c
--- linux-2.5-bk/kernel/softirq.c 2003-01-02 12:32:49.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/kernel/softirq.c 2003-01-13 18:16:39.000000000 +1100
@@ -32,7 +32,7 @@
- Tasklets: serialized wrt itself.
*/

-irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
+irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned_in_smp;

static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/kernel/workqueue.c working-2.5-bk-cacheline-nosmp/kernel/workqueue.c
--- linux-2.5-bk/kernel/workqueue.c 2003-01-02 12:29:33.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/kernel/workqueue.c 2003-01-13 18:16:55.000000000 +1100
@@ -42,7 +42,7 @@ struct cpu_workqueue_struct {
task_t *thread;
struct completion exit;

-} ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;

/*
* The externally visible workqueue abstraction is an array of
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/mm/rmap.c working-2.5-bk-cacheline-nosmp/mm/rmap.c
--- linux-2.5-bk/mm/rmap.c 2003-01-10 10:55:43.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/mm/rmap.c 2003-01-13 18:17:14.000000000 +1100
@@ -50,7 +50,7 @@
struct pte_chain {
struct pte_chain *next;
pte_addr_t ptes[NRPTE];
-} ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;

kmem_cache_t *pte_chain_cache;

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/net/core/dev.c working-2.5-bk-cacheline-nosmp/net/core/dev.c
--- linux-2.5-bk/net/core/dev.c 2003-01-13 16:56:30.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/net/core/dev.c 2003-01-13 18:18:32.000000000 +1100
@@ -194,7 +194,7 @@ static struct notifier_block *netdev_cha
* Device drivers call our routines to queue packets here. We empty the
* queue in the local softnet handler.
*/
-struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
+struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned_in_smp;

#ifdef CONFIG_NET_FASTROUTE
int netdev_fastroute;
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/net/ipv4/netfilter/ip_tables.c working-2.5-bk-cacheline-nosmp/net/ipv4/netfilter/ip_tables.c
--- linux-2.5-bk/net/ipv4/netfilter/ip_tables.c 2003-01-02 12:26:22.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/net/ipv4/netfilter/ip_tables.c 2003-01-13 18:18:48.000000000 +1100
@@ -97,7 +97,7 @@ struct ipt_table_info
unsigned int underflow[NF_IP_NUMHOOKS];

/* ipt_entry tables: one per CPU */
- char entries[0] ____cacheline_aligned;
+ char entries[0] ____cacheline_aligned_in_smp;
};

static LIST_HEAD(ipt_target);
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/net/ipv4/tcp_ipv4.c working-2.5-bk-cacheline-nosmp/net/ipv4/tcp_ipv4.c
--- linux-2.5-bk/net/ipv4/tcp_ipv4.c 2003-01-11 14:44:41.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/net/ipv4/tcp_ipv4.c 2003-01-13 18:18:42.000000000 +1100
@@ -85,7 +85,7 @@ static struct socket *tcp_socket;
void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
struct sk_buff *skb);

-struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
+struct tcp_hashinfo __cacheline_aligned_in_smp tcp_hashinfo = {
.__tcp_lhash_lock = RW_LOCK_UNLOCKED,
.__tcp_lhash_users = ATOMIC_INIT(0),
.__tcp_lhash_wait
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5-bk/net/ipv4/xfrm_policy.c working-2.5-bk-cacheline-nosmp/net/ipv4/xfrm_policy.c
--- linux-2.5-bk/net/ipv4/xfrm_policy.c 2003-01-11 14:44:41.000000000 +1100
+++ working-2.5-bk-cacheline-nosmp/net/ipv4/xfrm_policy.c 2003-01-13 18:18:52.000000000 +1100
@@ -50,7 +50,7 @@ static inline u32 flow_hash(struct flowi
static int flow_lwm = 2*FLOWCACHE_HASH_SIZE;
static int flow_hwm = 4*FLOWCACHE_HASH_SIZE;

-static int flow_number[NR_CPUS] __cacheline_aligned;
+static int flow_number[NR_CPUS] __cacheline_aligned_in_smp;

#define flow_count(cpu) (flow_number[cpu])


--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.


2003-01-13 07:36:32

by Jeff Garzik

[permalink] [raw]
Subject: Re: [PATCH] __cacheline_aligned_in_smp?

On Mon, Jan 13, 2003 at 06:24:40PM +1100, Rusty Russell wrote:
> Dave: Anton suggested you might have a justification for
> __cacheline_aligned doing something on UP?
>
> I think I'd prefer __cacheline_aligned to be the same as
> __cacheline_aligned_in_smp, and have a new __cacheline_aligned_always
> for those who REALLY want it (if any).

See the recent thread on tg3 and cacheline_aligned for David's
description... I and one other did some performance measurements and
____cacheline_aligned proved useful even on UP...

sigh. I wish I had caught you on IRC.

Don't you think changing the meaning of cacheline_aligned, and adding a
new __cacheline_aligned_always to mean what it used to, is completely
pointless churn??

Jeff



2003-01-13 07:35:49

by David Miller

[permalink] [raw]
Subject: Re: [PATCH] __cacheline_aligned_in_smp?

From: Rusty Russell <[email protected]>
Date: Mon, 13 Jan 2003 18:24:40 +1100

Dave: Anton suggested you might have a justification for
__cacheline_aligned doing something on UP?

I think I'd prefer __cacheline_aligned to be the same as
__cacheline_aligned_in_smp, and have a new __cacheline_aligned_always
for those who REALLY want it (if any).

I think things like oprofile_buffer really want it always.

-struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
+struct tcp_hashinfo __cacheline_aligned_in_smp tcp_hashinfo = {

This definitely too.

All of the submembers are placed at cacheline boundaries, which
helps even on UP. If I meant cacheline aligned on SMP I would
have used the corresponding macro :)

2003-01-13 09:36:54

by David Gibson

[permalink] [raw]
Subject: Re: [PATCH] __cacheline_aligned_in_smp?

On Mon, Jan 13, 2003 at 06:24:40PM +1100, Paul 'Rusty' Russell wrote:
> Dave: Anton suggested you might have a justification for
> __cacheline_aligned doing something on UP?

It could matter for DMA buffers on UP machines with non-cache coherent
DMA.

--
David Gibson | For every complex problem there is a
[email protected] | solution which is simple, neat and
| wrong.
http://www.ozlabs.org/people/dgibson

2003-01-14 02:46:19

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] __cacheline_aligned_in_smp?

In message <[email protected]> you write:
> On Mon, Jan 13, 2003 at 06:24:40PM +1100, Rusty Russell wrote:
> > Dave: Anton suggested you might have a justification for
> > __cacheline_aligned doing something on UP?
> >
> > I think I'd prefer __cacheline_aligned to be the same as
> > __cacheline_aligned_in_smp, and have a new __cacheline_aligned_always
> > for those who REALLY want it (if any).
>
> See the recent thread on tg3 and cacheline_aligned for David's
> description... I and one other did some performance measurements and
> ____cacheline_aligned proved useful even on UP...

Thanks, finally found the thread.

> sigh. I wish I had caught you on IRC.

<sniff> I miss you too Jeff... 8)

> Don't you think changing the meaning of cacheline_aligned, and adding a
> new __cacheline_aligned_always to mean what it used to, is completely
> pointless churn??

Agreed. Now I understand that's it's also possibly useful on UP, not
just programmer laziness, like it is in the module.h case 8)

Of course, it'd be nice to see benchmarks to justify each of these,
but wouldn't it always?

Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2003-01-14 02:46:02

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] __cacheline_aligned_in_smp?

On Sun, 12 Jan 2003 23:35:13 -0800 (PST)
"David S. Miller" <[email protected]> wrote:

> All of the submembers are placed at cacheline boundaries, which
> helps even on UP. If I meant cacheline aligned on SMP I would
> have used the corresponding macro :)

Hmm, you really want to weakly align it: you don't care if something follows it on
the cacheline, (ie. don't make it into an array, but it'd be nice if other
things could share the cacheline) in UP.

I don't think there's a way of doing that short of using asm?

It'd be nice if someone volunteered benchmarks. struct tcp_hashinfo takes
*two* whole cachlines, for example:

> -struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = {
> +struct tcp_hashinfo __cacheline_aligned_in_smp tcp_hashinfo = {
>
> This definitely too.

The decl already puts the non-read-heavy members __cacheline_aligned:

extern struct tcp_hashinfo {
...
rwlock_t __tcp_lhash_lock ____cacheline_aligned;

*This* should probably be ____cacheline_aligned_in_smp, yes?

Thanks for the reply!
Rusty.
--
there are those who do and those who hang on and you don't see too
many doers quoting their contemporaries. -- Larry McVoy

2003-01-14 06:33:36

by David Miller

[permalink] [raw]
Subject: Re: [PATCH] __cacheline_aligned_in_smp?

From: Rusty Russell <[email protected]>
Date: Tue, 14 Jan 2003 12:10:12 +1100

Hmm, you really want to weakly align it: you don't care if something follows it on
the cacheline, (ie. don't make it into an array, but it'd be nice if other
things could share the cacheline) in UP.

No, that is an incorrect statement.

I want the rest of the cacheline to be absent of any write-possible
data. There are many members in there which are read-only and thus
will only consume a cacheline which would never need to be written
back to main memory due to modification.

If you allow other things to seep into that cache line, you totally
obliterate what I was trying to accomplish.

I don't think there's a way of doing that short of using asm?

You really don't understand what I'm trying to accomplish.

I want alignment on cache line boundary, and I don't want anything
else in that cacheline.

Franks a lot,
David S. Miller
[email protected]

2003-01-15 08:15:52

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] __cacheline_aligned_in_smp?

In message <[email protected]> you write:
> From: Rusty Russell <[email protected]>
> Date: Tue, 14 Jan 2003 12:10:12 +1100
>
> Hmm, you really want to weakly align it: you don't care if something follo
ws it on
> the cacheline, (ie. don't make it into an array, but it'd be nice if other
> things could share the cacheline) in UP.
>
> No, that is an incorrect statement.
>
> I want the rest of the cacheline to be absent of any write-possible
> data. There are many members in there which are read-only and thus
> will only consume a cacheline which would never need to be written
> back to main memory due to modification.

But it's not quite that simple, either. If we say dirty cachelines
cost twice as much as read-only ones (ie. read + write vs. read +
discard), it gives some guide. In particular, if a structure has
parts:
struct foo {
readonly R;
writeable W;
};

And it normally fits in one cacheline, but you set the alignment of W
to a cacheline, now it fits in two, you've lost. (Note, struct
tcp_hashinfo is not such a structure, this is just talking to the
gallery).

> You really don't understand what I'm trying to accomplish.

No. Thanks for the explanation.

> I want alignment on cache line boundary, and I don't want anything
> else in that cacheline.

A "read-mostly" section might be appropriate, then. Of course, you'd
have to split the structure, in that case, and it's not worth it if
there are only a few of these.

Have I finally got it through my thick skull now?
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2003-01-16 07:22:08

by David Miller

[permalink] [raw]
Subject: Re: [PATCH] __cacheline_aligned_in_smp?

From: Rusty Russell <[email protected]>
Date: Wed, 15 Jan 2003 19:02:20 +1100

> I want alignment on cache line boundary, and I don't want anything
> else in that cacheline.

A "read-mostly" section might be appropriate, then. Of course, you'd
have to split the structure, in that case, and it's not worth it if
there are only a few of these.

Have I finally got it through my thick skull now?

I think so. A read-mostly section would allow us to exploit this
more for other things.

BTW, the tcp_hashinfo struct exists only because the linker could
otherwise legally reorder data section members.

2003-01-21 14:02:39

by Bill Davidsen

[permalink] [raw]
Subject: Re: [PATCH] __cacheline_aligned_in_smp?

On Mon, 13 Jan 2003, David S. Miller wrote:

> From: Rusty Russell <[email protected]>
> Date: Tue, 14 Jan 2003 12:10:12 +1100
>
> Hmm, you really want to weakly align it: you don't care if something follows it on
> the cacheline, (ie. don't make it into an array, but it'd be nice if other
> things could share the cacheline) in UP.
>
> No, that is an incorrect statement.
>
> I want the rest of the cacheline to be absent of any write-possible
> data. There are many members in there which are read-only and thus
> will only consume a cacheline which would never need to be written
> back to main memory due to modification.
>
> If you allow other things to seep into that cache line, you totally
> obliterate what I was trying to accomplish.

Am I missing something here? If you have ro and rw data in a cache line:
1r 0w if you don't modify the data
1r 1w if you do
if you have ro and rw in separate cache lines:
2r 0w if you don't modify the data
2r 1w if you do

It would seem that you always have at least one read, and if you modify
the data at least one write, wherein is the saving?

Note: I am not disagreeing with you, I just can't follow how this is a
win in any case.

--
bill davidsen <[email protected]>
CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.