LinuxLists.cc - Q: explicit alignment control for the slab allocator

2001-03-01 17:26:36

Subject: Q: explicit alignment control for the slab allocator

--- 2.4/mm/slab.c Wed Feb 28 14:05:52 2001
+++ build-2.4/mm/slab.c Thu Mar 1 17:45:11 2001
@@ -207,6 +207,7 @@
size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */
unsigned int colour_next; /* cache colouring */
+ size_t slab_size;
kmem_cache_t *slabp_cache;
unsigned int growing;
unsigned int dflags; /* dynamic flags */
@@ -356,7 +357,7 @@
objsize: sizeof(kmem_cache_t),
flags: SLAB_NO_REAP,
spinlock: SPIN_LOCK_UNLOCKED,
- colour_off: L1_CACHE_BYTES,
+ colour_off: SMP_CACHE_BYTES,
name: "kmem_cache",
};

@@ -379,8 +380,13 @@
static void enable_all_cpucaches (void);
#endif

+static size_t aligned_size(size_t x, size_t alignment)
+{
+ return (x+alignment-1)&(~(alignment-1));
+}
+
/* Cal the num objs, wastage, and bytes left over for a given slab size. */
-static void kmem_cache_estimate (unsigned long gfporder, size_t size,
+static void kmem_cache_estimate (unsigned long gfporder, size_t size, size_t align,
int flags, size_t *left_over, unsigned int *num)
{
int i;
@@ -393,7 +399,7 @@
extra = sizeof(kmem_bufctl_t);
}
i = 0;
- while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
+ while (i*size + aligned_size(base+i*extra, align) <= wastage)
i++;
if (i > 0)
i--;
@@ -403,7 +409,7 @@

*num = i;
wastage -= i*size;
- wastage -= L1_CACHE_ALIGN(base+i*extra);
+ wastage -= aligned_size(base+i*extra, align);
*left_over = wastage;
}

@@ -415,13 +421,15 @@
init_MUTEX(&cache_chain_sem);
INIT_LIST_HEAD(&cache_chain);

- kmem_cache_estimate(0, cache_cache.objsize, 0,
+ kmem_cache_estimate(0, cache_cache.objsize, SMP_CACHE_BYTES, 0,
&left_over, &cache_cache.num);
if (!cache_cache.num)
BUG();

cache_cache.colour = left_over/cache_cache.colour_off;
cache_cache.colour_next = 0;
+ cache_cache.slab_size = aligned_size(sizeof(slab_t)
+ + cache_cache.num*sizeof(kmem_bufctl_t), SMP_CACHE_BYTES);
}

@@ -589,7 +597,7 @@
* kmem_cache_create - Create a cache.
* @name: A string which is used in /proc/slabinfo to identify this cache.
* @size: The size of objects to be created in this cache.
- * @offset: The offset to use within the page.
+ * @align: The required alignment for the objects.
* @flags: SLAB flags
* @ctor: A constructor for the objects.
* @dtor: A destructor for the objects.
@@ -614,12 +622,12 @@
* as davem.
*/
kmem_cache_t *
-kmem_cache_create (const char *name, size_t size, size_t offset,
+kmem_cache_create (const char *name, size_t size, size_t align,
unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
void (*dtor)(void*, kmem_cache_t *, unsigned long))
{
const char *func_nm = KERN_ERR "kmem_create: ";
- size_t left_over, align, slab_size;
+ size_t left_over, slab_size;
kmem_cache_t *cachep = NULL;

/*
@@ -631,7 +639,7 @@
(size < BYTES_PER_WORD) ||
(size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
(dtor && !ctor) ||
- (offset < 0 || offset > size))
+ (align < 0))
BUG();

#if DEBUG
@@ -647,7 +655,7 @@
flags &= ~SLAB_POISON;
}
#if FORCED_DEBUG
- if (size < (PAGE_SIZE>>3))
+ if (size < (PAGE_SIZE>>3) && (align <= BYTES_PER_WORD))
/*
* do not red zone large object, causes severe
* fragmentation.
@@ -680,38 +688,42 @@
size &= ~(BYTES_PER_WORD-1);
printk("%sForcing size word alignment - %s\n", func_nm, name);
}
-
-#if DEBUG
- if (flags & SLAB_RED_ZONE) {
- /*
- * There is no point trying to honour cache alignment
- * when redzoning.
+
+ if (align < BYTES_PER_WORD)
+ align = BYTES_PER_WORD;
+
+ /*
+ * There is no point trying to honour cache alignment
+ * when redzoning.
+ */
+ if ((flags & SLAB_HWCACHE_ALIGN) && !(flags & SLAB_RED_ZONE)) {
+ int autoalign = SMP_CACHE_BYTES;
+ /* HWCACHE_ALIGN is only a hint, squeeze multiple objects
+ * into one cache line if they fit.
+ * Otherwise we would 128 byte align the 32 byte kmalloc
+ * block on a P IV...
*/
- flags &= ~SLAB_HWCACHE_ALIGN;
- size += 2*BYTES_PER_WORD; /* words for redzone */
+ while (size < autoalign/2)
+ autoalign /= 2;
+ if (autoalign > align)
+ align = autoalign;
}
+
+#if DEBUG
+ if (flags & SLAB_RED_ZONE)
+ size += 2*BYTES_PER_WORD; /* words for redzone */
#endif
- align = BYTES_PER_WORD;
- if (flags & SLAB_HWCACHE_ALIGN)
- align = L1_CACHE_BYTES;
+
+ /* Need to adjust size so that objs are cache aligned. */
+ size = aligned_size(size, align);

/* Determine if the slab management is 'on' or 'off' slab. */
- if (size >= (PAGE_SIZE>>3))
+ if (size >= (PAGE_SIZE>>3) || align >= (PAGE_SIZE>>3))
/*
* Size is large, assume best to place the slab management obj
* off-slab (should allow better packing of objs).
*/
flags |= CFLGS_OFF_SLAB;
-
- if (flags & SLAB_HWCACHE_ALIGN) {
- /* Need to adjust size so that objs are cache aligned. */
- /* Small obj size, can get at least two per cache line. */
- /* FIXME: only power of 2 supported, was better */
- while (size < align/2)
- align /= 2;
- size = (size+align-1)&(~(align-1));
- }
-
/* Cal size (in pages) of slabs, and the num of objs per slab.
* This could be made much more intelligent. For now, try to avoid
* using high page-orders for slabs. When the gfp() funcs are more
@@ -720,7 +732,7 @@
do {
unsigned int break_flag = 0;
cal_wastage:
- kmem_cache_estimate(cachep->gfporder, size, flags,
+ kmem_cache_estimate(cachep->gfporder, size, align, flags,
&left_over, &cachep->num);
if (break_flag)
break;
@@ -754,24 +766,24 @@
cachep = NULL;
goto opps;
}
- slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t));
-
/*
* If the slab has been placed off-slab, and we have enough space then
* move it on-slab. This is at the expense of any extra colouring.
*/
- if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
+ slab_size = aligned_size(cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t), align);
+
+ if ((flags & CFLGS_OFF_SLAB) && left_over >= slab_size) {
flags &= ~CFLGS_OFF_SLAB;
left_over -= slab_size;
}
+ if (flags & CFLGS_OFF_SLAB) {
+ /* really off slab. No need for manual alignment */
+ slab_size = cachep->num*sizeof(kmem_bufctl_t)+sizeof(slab_t);
+ }

- /* Offset must be a multiple of the alignment. */
- offset += (align-1);
- offset &= ~(align-1);
- if (!offset)
- offset = L1_CACHE_BYTES;
- cachep->colour_off = offset;
- cachep->colour = left_over/offset;
+ cachep->slab_size = slab_size;
+ cachep->colour_off = align;
+ cachep->colour = left_over/align;

/* init remaining fields */
if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
@@ -1016,8 +1028,7 @@
* if you enable OPTIMIZE
*/
slabp = objp+colour_off;
- colour_off += L1_CACHE_ALIGN(cachep->num *
- sizeof(kmem_bufctl_t) + sizeof(slab_t));
+ colour_off += cachep->slab_size;
}
slabp->inuse = 0;
slabp->colouroff = colour_off;

Attachments:

patch-slab-align (6.71 kB)

2001-03-01 19:26:01

by David Miller

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

Manfred, why are you changing the cache alignment to
SMP_CACHE_BYTES? If you read the original SLAB papers
and other documents, the code intends to color the L1
cache not the L2 or subsidiary caches.

Later,
David S. Miller
[email protected]

2001-03-01 19:31:01

by Mark Hemment

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

On Thu, 1 Mar 2001, Manfred Spraul wrote:

> Alan added a CONFIG options for FORCED_DEBUG slab debugging, but there
> is one minor problem with FORCED_DEBUG: FORCED_DEBUG disables
> HW_CACHEALIGN, and several drivers assume that HW_CACHEALIGN implies a
> certain alignment (iirc usb/uhci.c assumes 16-byte alignment)
>
> I've attached a patch that fixes the explicit alignment control in
> kmem_cache_create().
>
> The parameter 'offset' [the minimum offset to be used for cache
> coloring] actually is the guaranteed alignment, except that the
> implementation was broken. I've fixed the implementation and renamed
> 'offset' to 'align'.

As the original author of the slab allocator, I can assure you there is
nothing guaranteed about "offset". Neither is it to do with any minimum.

The original idea behind offset was for objects with a "hot" area
greater than a single L1 cache line. By using offset correctly (and to my
knowledge it has never been used anywhere in the Linux kernel), a SLAB
cache creator (caller of kmem_cache_create()) could ask the SLAB for more
than one colour (space/L1 cache lines) offset between objects.

As no one uses the feature it could well be broken, but is that a reason
to change its meaning?

Mark

2001-03-01 19:47:03

by Manfred Spraul

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

"David S. Miller" wrote:
>
> Manfred, why are you changing the cache alignment to
> SMP_CACHE_BYTES? If you read the original SLAB papers
> and other documents, the code intends to color the L1
> cache not the L2 or subsidiary caches.
>
I'll undo that change.

I only found this comment in the source file:

> /* For performance, all the general caches are L1 aligned.
> * This should be particularly beneficial on SMP boxes, as it
> * eliminates "false sharing".
> * Note for systems short on memory removing the alignment will
> * allow tighter packing of the smaller caches. */

To avoid false sharing we would need SMP_CACHE_BYTES aligning, not L1
aligning.

--
Manfred

2001-03-01 19:55:43

by Manfred Spraul

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

Mark Hemment wrote:
>
> The original idea behind offset was for objects with a "hot" area
> greater than a single L1 cache line. By using offset correctly (and to my
> knowledge it has never been used anywhere in the Linux kernel), a SLAB
> cache creator (caller of kmem_cache_create()) could ask the SLAB for more
> than one colour (space/L1 cache lines) offset between objects.
>

What's the difference between this definition of 'offset' and alignment?

alignment means that (addr%alignment==0)
offset means that (addr1-addr2 == n*offset)

Isn't the only difference the alignment of the first object in a slab?

> As no one uses the feature it could well be broken, but is that a reason
> to change its meaning?
>

Some hardware drivers use HW_CACHEALIGN and assume certain byte
alignments, and arm needs 1024 byte aligned blocks.

--
Manfred

2001-03-01 20:23:06

by Mark Hemment

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

On Thu, 1 Mar 2001, Manfred Spraul wrote:

> Mark Hemment wrote:
> >
> > The original idea behind offset was for objects with a "hot" area
> > greater than a single L1 cache line. By using offset correctly (and to my
> > knowledge it has never been used anywhere in the Linux kernel), a SLAB
> > cache creator (caller of kmem_cache_create()) could ask the SLAB for more
> > than one colour (space/L1 cache lines) offset between objects.
> >
>
> What's the difference between this definition of 'offset' and alignment?

The positioning of the first object within a slab (at least that is how
it is suppose to work).

The distance between all objects within a slab is constant, so the
colouring of objects depends upon the cache line (offset) the first object
is placed on.
The alignment is the boundary objects fall upon within a slab. This may
require 'padding' between the objects so they fall on the correct
boundaries (ie. they aren't a 'natural' size).
For kmem_cache_create(), a zero offset means the offset is the same as
the alignment.

Take the case of offset being 64, and alignment being 32.
Here, the allocator attempts to place the first object on a 64byte
boundary (say, at offset 0), and all subsequent objects (within the same
cache) on a 32byte boundary.
Now, when it comes to construct the next slab, it tries to place the
first object 64bytes offset from the first object in the previous
slab (say, at offset 64). The distance between the objects is still the
same - ie. they fall on 32byte boundaries.

See the difference?

> alignment means that (addr%alignment==0)
> offset means that (addr1-addr2 == n*offset)
>
> Isn't the only difference the alignment of the first object in a slab?

Yes (as explained above). It is important.

> Some hardware drivers use HW_CACHEALIGN and assume certain byte
> alignments, and arm needs 1024 byte aligned blocks.

I should have put a big comment in the allocator, saying aligment/offset
are only hints to the allocator and not guarantees.
Unfortunately, the allocator was always returning L1 aligned objects
with HW_CACHEALIGN, so folks started to depend on it. Too late to break
that now.
It sounds as if HW_CACHEALIGN has been broken by a config option, and
this needs to be fixed.
But leave 'offset' alone?! If it isn't working as described above, then
it needs fixing, but don't change its definition.

Mark

2001-03-01 21:55:55

by Manfred Spraul

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

Mark Hemment wrote:
>
> On Thu, 1 Mar 2001, Manfred Spraul wrote:
>
> > Mark Hemment wrote:
> > >
> > > The original idea behind offset was for objects with a "hot" area
> > > greater than a single L1 cache line. By using offset correctly (and to my
> > > knowledge it has never been used anywhere in the Linux kernel), a SLAB
> > > cache creator (caller of kmem_cache_create()) could ask the SLAB for more
> > > than one colour (space/L1 cache lines) offset between objects.
> > >
> >
> > What's the difference between this definition of 'offset' and alignment?
>
> The positioning of the first object within a slab (at least that is how
> it is suppose to work).
>
> The distance between all objects within a slab is constant, so the
> colouring of objects depends upon the cache line (offset) the first object
> is placed on.
> The alignment is the boundary objects fall upon within a slab. This may
> require 'padding' between the objects so they fall on the correct
> boundaries (ie. they aren't a 'natural' size).
> For kmem_cache_create(), a zero offset means the offset is the same as
> the alignment.
>
> Take the case of offset being 64, and alignment being 32.
> Here, the allocator attempts to place the first object on a 64byte
> boundary (say, at offset 0), and all subsequent objects (within the same
> cache) on a 32byte boundary.
> Now, when it comes to construct the next slab, it tries to place the
> first object 64bytes offset from the first object in the previous
> slab (say, at offset 64). The distance between the objects is still the
> same - ie. they fall on 32byte boundaries.
>
> See the difference?
>

Yes, I see the difference, but I'm not sure that it will work as
intended.
offset must be a multiple of the alignment, everything else won't work.

In which cases an offset > alignment is really a win?

Obviously using offset 32 bytes for a structure with a 64 byte hot zone
means that 2 slabs with a different "color" compete for the same cache
lines [just assuming 32 byte cache lines for simplicity] in 50% of the
cases, but otoh offset==64 halfs the number of possible colors.

--
Manfred

2001-03-02 10:53:04

by Mark Hemment

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

On Thu, 1 Mar 2001, Manfred Spraul wrote:
> Yes, I see the difference, but I'm not sure that it will work as
> intended.
> offset must be a multiple of the alignment, everything else won't work.

The code does force the offset to be a multiple of the alignment -
rounding the offset up. The idea was to a caller could something like;
kmem_cache_create("foo", sizeof(foo_s),
offsetoff(foo_s, member), ....);

where structure members in foo_s are "hot" up until the 'member'
structure.

> In which cases an offset > alignment is really a win?

You've got me. :) I don't know.
In the Bonwick paper, such a facility was described, so I thought "hey,
sounds like that might be useful".
Could be a win on archs with small L1 cache line sizes (16bytes on a
486) - but most modern processors have larger lines.
Hmm, no that note, seen the L1 line size defined for a Pentium IIII?
128 bytes!! (CONFIG_X86_L1_CACHE_SHIFT of 7). That is probably going to
waste a lot of space for small objects.

> Obviously using offset 32 bytes for a structure with a 64 byte hot zone
> means that 2 slabs with a different "color" compete for the same cache
> lines [just assuming 32 byte cache lines for simplicity] in 50% of the
> cases, but otoh offset==64 halfs the number of possible colors.

Yes.
It is possibly to improve on the current slab allocator, to get an
extra colour or two out of it for some object sizes (eg. when the slab
management is on slab, it is only ever at the front of a slab - it could
also wrap round to the rear).

Mark

2001-03-02 11:47:15

by Manfred Spraul

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

Zitiere Mark Hemment <[email protected]>:

>
>
> > In which cases an offset > alignment is really a win?
>
> You've got me. :) I don't know.
> In the Bonwick paper, such a facility was described, so I thought
> "hey,
> sounds like that might be useful".
> Could be a win on archs with small L1 cache line sizes (16bytes on a
> 486) - but most modern processors have larger lines.

IIRC cache colouring was introduced for some sun hardware with 2 memory busses:
one handes (addr%256<128), the other one (addr%256>=128)

If everything is perfectly aligned, the load one one bus was far higher than the
load on the other bus.

Probably there are similar problems when walking linked lists if the L1 cache
has a low associativity.

> Hmm, no that note, seen the L1 line size defined for a Pentium IIII?
> 128 bytes!! (CONFIG_X86_L1_CACHE_SHIFT of 7). That is probably going to
> waste a lot of space for small objects.
>
No, it doesn't:
HWCACHE_ALIGN means "do not cross a cache line boundary".
Thus all power-of-2 objects are unaffected.
It does waste a lot of space for 129 byte objects with HWCACHALIGN set: they are
rounded up to 256 bytes.
But that isn't new: 68 byte buffer heads on Athlon kernels were rounded to 128
bytes.
[btw, 128 bytes is the L2 cache line size. The L1 cache line size is 64 bytes]

>
> > Obviously using offset 32 bytes for a structure with a 64 byte hot
> zone
> > means that 2 slabs with a different "color" compete for the same cache
> > lines [just assuming 32 byte cache lines for simplicity] in 50% of the
> > cases, but otoh offset==64 halfs the number of possible colors.
>
> Yes.
> It is possibly to improve on the current slab allocator, to get an
> extra colour or two out of it for some object sizes (eg. when the slab
> management is on slab, it is only ever at the front of a slab - it could
> also wrap round to the rear).
>
That's another issue.

The question is who should decide about the cache colour offset?

a) the slab allocator always chooses the smallest sensible offset (i.e. the
alignment)
b) the caller can specify the offset, e.g. if the caller knows that the hot zone
is large he would use a larger colour offset.

Even if the hot zone is larger than the default offset, is there any advantage
of increasing the colour offset beyond the alignment?

I don't see an advantage.

--
Manfred

2001-03-02 12:32:30

by Mark Hemment

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

On Fri, 2 Mar 2001, Manfred Spraul wrote:
> Zitiere Mark Hemment <[email protected]>:
> > Could be a win on archs with small L1 cache line sizes (16bytes on a
> > 486) - but most modern processors have larger lines.
>
> IIRC cache colouring was introduced for some sun hardware with 2 memory busses:
> one handes (addr%256<128), the other one (addr%256>=128)
>
> If everything is perfectly aligned, the load one one bus was far higher than the
> load on the other bus.

Yes.
High-end Intel PCs have also had interleaved buses for a few years
now. So it is not just for Sun h/w.

> > Hmm, no that note, seen the L1 line size defined for a Pentium IIII?
> > 128 bytes!! (CONFIG_X86_L1_CACHE_SHIFT of 7). That is probably going to
> > waste a lot of space for small objects.
> >
> No, it doesn't:
> HWCACHE_ALIGN means "do not cross a cache line boundary".

Ah, I broke my code!!!!! :(

In my original slab, the code to do "packing" of objects into a single
cache line was #if-def'ed out for SMP to avoid the possibility of
false-sharing between objects. Not a large possibility, but it exists.

> The question is who should decide about the cache colour offset?
>
> a) the slab allocator always chooses the smallest sensible offset (i.e. the
> alignment)
> b) the caller can specify the offset, e.g. if the caller knows that the hot zone
> is large he would use a larger colour offset.

Only the caller knows about the attributes and usage of an object, so
they should be able to request (not demand) the offset/alignment of the
allocator. (OK, they can demand the alignment.)

> Even if the hot zone is larger than the default offset, is there any advantage
> of increasing the colour offset beyond the alignment?
>
> I don't see an advantage.

I do, but like you, I don't have any data to prove my point.
Time to get profiling?

Mark

2001-03-02 13:22:13

by Manfred Spraul

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

Mark Hemment wrote:
>
> > > Hmm, no that note, seen the L1 line size defined for a Pentium IIII?
> > > 128 bytes!! (CONFIG_X86_L1_CACHE_SHIFT of 7). That is probably going to
> > > waste a lot of space for small objects.
> > >
> > No, it doesn't:
> > HWCACHE_ALIGN means "do not cross a cache line boundary".
>
> Ah, I broke my code!!!!! :(
>
> In my original slab, the code to do "packing" of objects into a single
> cache line was #if-def'ed out for SMP to avoid the possibility of
> false-sharing between objects. Not a large possibility, but it exists.
>
But then you need SMP_CACHE_BYTES, not L1_CACHE_BYTES.
And 128 byte aligning the 32-byte kmalloc cache wastes too much memory
;-)

If the caller of kmem_cache_create really wants do avoid false sharing
he could set align to SMP_CACHE_BYTES. (e.g. for some per-cpu data
structures)

> > Even if the hot zone is larger than the default offset, is there any advantage
> > of increasing the colour offset beyond the alignment?
> >
> > I don't see an advantage.
>
> I do, but like you, I don't have any data to prove my point.
> Time to get profiling?
>

How? You've already noticed that noone in the linux kernel uses offset.

--
Manfred

2001-03-07 20:03:07

by Jes Sorensen

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

>>>>> "Manfred" == Manfred Spraul <[email protected]> writes:

Manfred> Mark Hemment wrote:
>> As no one uses the feature it could well be broken, but is that a
>> reason to change its meaning?

Manfred> Some hardware drivers use HW_CACHEALIGN and assume certain
Manfred> byte alignments, and arm needs 1024 byte aligned blocks.

Isn't that just a reinvention of SMP_CACHE_BYTES? Or are there
actually machines out there where the inbetween CPU cache line size
differs from the between CPU and DMA controller cache line size?

Jes

2001-03-07 20:37:15

by Manfred Spraul

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

From: "Jes Sorensen" <[email protected]>
> >>>>> "Manfred" == Manfred Spraul <[email protected]> writes:
>
> Manfred> Mark Hemment wrote:
> >> As no one uses the feature it could well be broken, but is that a
> >> reason to change its meaning?
>
> Manfred> Some hardware drivers use HW_CACHEALIGN and assume certain
> Manfred> byte alignments, and arm needs 1024 byte aligned blocks.
>
> Isn't that just a reinvention of SMP_CACHE_BYTES? Or are there
> actually machines out there where the inbetween CPU cache line size
> differs from the between CPU and DMA controller cache line size?
>
No.

First of all HW_CACHEALIGN aligns to the L1 cache, not SMP_CACHE_BYTES.
Additionally you sometimes need a guaranteed alignment for other
problems, afaik ARM needs 1024 bytes for some structures due to cpu
restrictions, and several usb controllers need 16 byte alignment.

And some callers of kmem_cache_create() want SMP_CACHE_BYTES alignment,
other callers (and DaveM) expect L1_CACHE_BYTES alignment.

It's more a API clarification than a real change.

I think it can wait until 2.5:
drivers should use pci_alloc_consistent_pool(), not
kmalloc_aligned()+virt_to_bus(), arm can wait and the ability to choose
between SMP and L1 alignment is not that important.

--
Manfred

2001-03-08 17:31:15

by Jes Sorensen

[permalink] [raw]

Subject: Re: Q: explicit alignment control for the slab allocator

>>>>> "Manfred" == Manfred Spraul <[email protected]> writes:

Manfred> First of all HW_CACHEALIGN aligns to the L1 cache, not
Manfred> SMP_CACHE_BYTES. Additionally you sometimes need a
Manfred> guaranteed alignment for other problems, afaik ARM needs 1024
Manfred> bytes for some structures due to cpu restrictions, and
Manfred> several usb controllers need 16 byte alignment.

My question is whats the point in asking for L1_CACHE_BYTES alignment
for hardware reasons when you can't see it beyond the cache controller
anyway? Sure it makes sense for data structures only used by the CPU,
but for structures that are shared between CPUs or goes to DMA
controllers it seems to make little sense.

Jes