Hi Linus,
This patch moves task_struct allocation from kernel/fork.c into
arch/*/kernel/process.c.
David Mosberger wrote:
> David.H> What might be worth doing is to move the task_struct slab
> David.H> cache and (de-)allocator out of fork.c and to stick it in
> David.H> the arch somewhere. Then archs aren't bound to have the two
> David.H> separate. So for a system that can handle lots of memory,
> David.H> you can allocate the thread_info, task_struct and
> David.H> supervisor stack all on one very large chunk if you so
> David.H> wish.
>
> Could you do this? I'd prefer if task_info could be completely hidden
> inside the x86/sparc arch-specific code, but if things are set up such
> that we at least have the option to keep the stack, task_info, and
> task_struct in a single chunk of memory (and without pointers between
> them), I'd have much less of an issue with it.
David
diff -uNr linux-2.5.5-pre1/arch/i386/kernel/process.c linux-tib-255p1/arch/i386/kernel/process.c
--- linux-2.5.5-pre1/arch/i386/kernel/process.c Thu Feb 14 14:31:13 2002
+++ linux-tib-255p1/arch/i386/kernel/process.c Thu Feb 14 14:32:35 2002
@@ -54,6 +54,8 @@
int hlt_counter;
+static kmem_cache_t *task_struct_cachep;
+
/*
* Return saved PC of a blocked thread.
*/
@@ -522,8 +524,49 @@
}
/*
- * Free current thread data structures etc..
+ * allocate and free task and thread data structures etc..
*/
+void __init init_task_struct_cache(void)
+{
+ /* create a slab on which task_structs can be allocated */
+ task_struct_cachep =
+ kmem_cache_create("task_struct",
+ sizeof(struct task_struct),0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!task_struct_cachep)
+ panic("fork_init(): cannot create task_struct SLAB cache");
+
+}
+
+struct task_struct *dup_task_struct(struct task_struct *orig)
+{
+ struct task_struct *tsk;
+ struct thread_info *ti;
+
+ ti = alloc_thread_info();
+ if (!ti) return NULL;
+
+ tsk = kmem_cache_alloc(task_struct_cachep,GFP_ATOMIC);
+ if (!tsk) {
+ free_thread_info(ti);
+ return NULL;
+ }
+
+ *ti = *orig->thread_info;
+ *tsk = *orig;
+ tsk->thread_info = ti;
+ ti->task = tsk;
+ atomic_set(&tsk->usage,1);
+
+ return tsk;
+}
+
+void __put_task_struct(struct task_struct *tsk)
+{
+ free_thread_info(tsk->thread_info);
+ kmem_cache_free(task_struct_cachep,tsk);
+}
+
void exit_thread(void)
{
/* nothing to do ... */
diff -uNr linux-2.5.5-pre1/arch/sparc64/kernel/process.c linux-tib-255p1/arch/sparc64/kernel/process.c
--- linux-2.5.5-pre1/arch/sparc64/kernel/process.c Thu Feb 14 14:04:42 2002
+++ linux-tib-255p1/arch/sparc64/kernel/process.c Thu Feb 14 14:21:26 2002
@@ -40,6 +40,8 @@
#include <asm/elf.h>
#include <asm/fpumacro.h>
+static kmem_cache_t *task_struct_cachep;
+
/* #define VERBOSE_SHOWREGS */
#ifndef CONFIG_SMP
@@ -398,7 +400,50 @@
return ret;
}
-/* Free current thread data structures etc.. */
+/*
+ * allocate and free task and thread data structures etc..
+ */
+void __init init_task_struct_cache(void)
+{
+ /* create a slab on which task_structs can be allocated */
+ task_struct_cachep =
+ kmem_cache_create("task_struct",
+ sizeof(struct task_struct),0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+ if (!task_struct_cachep)
+ panic("fork_init(): cannot create task_struct SLAB cache");
+
+}
+
+struct task_struct *dup_task_struct(struct task_struct *orig)
+{
+ struct task_struct *tsk;
+ struct thread_info *ti;
+
+ ti = alloc_thread_info();
+ if (!ti) return NULL;
+
+ tsk = kmem_cache_alloc(task_struct_cachep,GFP_ATOMIC);
+ if (!tsk) {
+ free_thread_info(ti);
+ return NULL;
+ }
+
+ *ti = *orig->thread_info;
+ *tsk = *orig;
+ tsk->thread_info = ti;
+ ti->task = tsk;
+ atomic_set(&tsk->usage,1);
+
+ return tsk;
+}
+
+void __put_task_struct(struct task_struct *tsk)
+{
+ free_thread_info(tsk->thread_info);
+ kmem_cache_free(task_struct_cachep,tsk);
+}
+
void exit_thread(void)
{
struct thread_info *t = current_thread_info();
diff -uNr linux-2.5.5-pre1/include/asm-i386/processor.h linux-tib-255p1/include/asm-i386/processor.h
--- linux-2.5.5-pre1/include/asm-i386/processor.h Thu Feb 14 14:31:18 2002
+++ linux-tib-255p1/include/asm-i386/processor.h Thu Feb 14 14:38:55 2002
@@ -425,6 +425,11 @@
struct task_struct;
struct mm_struct;
+/* task structure allocation */
+extern void init_task_struct_cache(void);
+extern struct task_struct *dup_task_struct(struct task_struct *orig);
+extern void __put_task_struct(struct task_struct *tsk);
+
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
/*
diff -uNr linux-2.5.5-pre1/include/asm-sparc64/processor.h linux-tib-255p1/include/asm-sparc64/processor.h
--- linux-2.5.5-pre1/include/asm-sparc64/processor.h Thu Feb 14 14:03:47 2002
+++ linux-tib-255p1/include/asm-sparc64/processor.h Thu Feb 14 14:34:38 2002
@@ -175,6 +175,11 @@
#define copy_segments(tsk, mm) do { } while (0)
#define release_segments(mm) do { } while (0)
+/* task structure allocation */
+extern void init_task_struct_cache(void);
+extern struct task_struct *dup_task_struct(struct task_struct *orig);
+extern void __put_task_struct(struct task_struct *tsk);
+
#define get_wchan(__TSK) \
({ extern void scheduling_functions_start_here(void); \
extern void scheduling_functions_end_here(void); \
diff -uNr linux-2.5.5-pre1/kernel/fork.c linux-tib-255p1/kernel/fork.c
--- linux-2.5.5-pre1/kernel/fork.c Thu Feb 14 14:31:19 2002
+++ linux-tib-255p1/kernel/fork.c Thu Feb 14 14:32:40 2002
@@ -30,8 +30,6 @@
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
-static kmem_cache_t *task_struct_cachep;
-
/* The idle threads do not count.. */
int nr_threads;
@@ -74,13 +72,7 @@
void __init fork_init(unsigned long mempages)
{
- /* create a slab on which task_structs can be allocated */
- task_struct_cachep =
- kmem_cache_create("task_struct",
- sizeof(struct task_struct),0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
- if (!task_struct_cachep)
- panic("fork_init(): cannot create task_struct SLAB cache");
+ init_task_struct_cache();
/*
* The default maximum number of threads is set to a safe
@@ -93,35 +85,6 @@
init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
}
-struct task_struct *dup_task_struct(struct task_struct *orig)
-{
- struct task_struct *tsk;
- struct thread_info *ti;
-
- ti = alloc_thread_info();
- if (!ti) return NULL;
-
- tsk = kmem_cache_alloc(task_struct_cachep,GFP_ATOMIC);
- if (!tsk) {
- free_thread_info(ti);
- return NULL;
- }
-
- *ti = *orig->thread_info;
- *tsk = *orig;
- tsk->thread_info = ti;
- ti->task = tsk;
- atomic_set(&tsk->usage,1);
-
- return tsk;
-}
-
-void __put_task_struct(struct task_struct *tsk)
-{
- free_thread_info(tsk->thread_info);
- kmem_cache_free(task_struct_cachep,tsk);
-}
-
/* Protects next_safe and last_pid. */
spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
David Howells wrote:
>
> Hi Linus,
>
> This patch moves task_struct allocation from kernel/fork.c into
> arch/*/kernel/process.c.
>
> David Mosberger wrote:
>
> > David.H> What might be worth doing is to move the task_struct slab
> > David.H> cache and (de-)allocator out of fork.c and to stick it in
> > David.H> the arch somewhere. Then archs aren't bound to have the two
> > David.H> separate. So for a system that can handle lots of memory,
> > David.H> you can allocate the thread_info, task_struct and
> > David.H> supervisor stack all on one very large chunk if you so
> > David.H> wish.
> >
> > Could you do this? I'd prefer if task_info could be completely hidden
> > inside the x86/sparc arch-specific code, but if things are set up such
> > that we at least have the option to keep the stack, task_info, and
> > task_struct in a single chunk of memory (and without pointers between
> > them), I'd have much less of an issue with it.
Is this the first in a multi-step patch series, or something like that?
You just duplicated code in a generic location and pasted it into the
arch. Where's the gain in that? I do see the gain in letting the arch
allocate the task struct, but surely your patch should provide a generic
mechanism for an arch to call by default, instead of duplicating code??
Jeff
--
Jeff Garzik | "I went through my candy like hot oatmeal
Building 1024 | through an internally-buttered weasel."
MandrakeSoft | - goats.com
> Is this the first in a multi-step patch series, or something like that?
What makes you ask that?
> You just duplicated code in a generic location and pasted it into the
> arch. Where's the gain in that? I do see the gain in letting the arch
> allocate the task struct, but surely your patch should provide a generic
> mechanism for an arch to call by default, instead of duplicating code??
Hmmm... Is it worth going through all fun of creating another CONFIG_xxxx
option to govern the inclusion of such code?
David
David Howells wrote:
> > Is this the first in a multi-step patch series, or something like that?
>
> What makes you ask that?
Because your patch just flat out duplicates code line for line into two
arches.
> > You just duplicated code in a generic location and pasted it into the
> > arch. Where's the gain in that? I do see the gain in letting the arch
> > allocate the task struct, but surely your patch should provide a generic
> > mechanism for an arch to call by default, instead of duplicating code??
>
> Hmmm... Is it worth going through all fun of creating another CONFIG_xxxx
> option to govern the inclusion of such code?
I am wondering where you want to go with this, short term and long
term. Is the implementation of this on other arches gonna look the same
-- just line for line copy of code? With maybe ia64 as the lone
exception?
Jeff
--
Jeff Garzik | "I went through my candy like hot oatmeal
Building 1024 | through an internally-buttered weasel."
MandrakeSoft | - goats.com
Jeff Garzik <[email protected]> wrote:
> David Howells wrote:
> > > Is this the first in a multi-step patch series, or something like that?
> >
> > What makes you ask that?
>
> Because your patch just flat out duplicates code line for line into two
> arches.
What I do to it next depends on the feedback I get back.
> I am wondering where you want to go with this, short term and long
> term. Is the implementation of this on other arches gonna look the same
> -- just line for line copy of code? With maybe ia64 as the lone
> exception?
Ask Linus, he asked for the task_struct/thread_info split. Various people have
complained about the two things being allocated separately (maintainers for
m68k and ia64 archs certainly, and if I remember rightly, x86_64 as well,
though I don't appear to have saved the message for that). However, DaveM
(sparc64) appears to really be in favour of it.
In any case, this is just a corollary to my main aim of getting task ornaments
included in the kernel. The userspace resumption notification was the true
prerequisite, to support which I adjusted entry.S to make sure it didn't cost
any extra clock cycles. This led to Linus requesting that everything that
entry.S needs to access be separated out into another structure (so that
entry.S never accesses task_struct).
David
Hi,
David Howells wrote:
> Ask Linus, he asked for the task_struct/thread_info split. Various people have
> complained about the two things being allocated separately (maintainers for
> m68k and ia64 archs certainly, and if I remember rightly, x86_64 as well,
> though I don't appear to have saved the message for that). However, DaveM
> (sparc64) appears to really be in favour of it.
Ok, so we have the following allocation possibilities.
1. allocate task_struct+thread_info+stack together.
2. allocate task_struct and thread_info+stack.
3. allocate task_struct+thread_info and stack.
1. is what we have so far and I hope Linus actually means a
task_struct/stack split. Linus, could you please clarify?
Architectures without a thread register want either the first or second
option. I don't really have an opinion, whether the first option should
be made obsolete. For architectures with a thread register the third
option makes most sense and is not really a problem, we only have to
specify the dependency between task_struct and thread_info.
bye, Roman
On Thu, Feb 14, 2002 at 05:03:14PM +0000, David Howells wrote:
> Ask Linus, he asked for the task_struct/thread_info split. Various people have
> complained about the two things being allocated separately (maintainers for
> m68k and ia64 archs certainly, and if I remember rightly, x86_64 as well,
> though I don't appear to have saved the message for that). However, DaveM
> (sparc64) appears to really be in favour of it.
I'm not especially in favour of it for Alpha either.
As far as I can see all it's done is added a memory load on the
fast path when a constant displacement folded into the address
would previously have sufficed.
r~
David Howells wrote:
>
> Jeff Garzik <[email protected]> wrote:
> > David Howells wrote:
> > > > Is this the first in a multi-step patch series, or something like that?
> > >
> > > What makes you ask that?
> >
> > Because your patch just flat out duplicates code line for line into two
> > arches.
>
> What I do to it next depends on the feedback I get back.
Here's my feedback, then ;-)
Your patch would be ok IMHO if you additionally changed the arches to
include task struct inside struct thread_info, getting things back down
to a single allocation for thread_info+task_struct, with 'current' once
again being a constant offset from the beginning of thread_info. [this
might require resurrecting the old patches to move task struct
definitions out of sched.h proper]
Basically, I think you are going in the right direction, but the
implementation detail of flat out copying code was what caught my
attention, and looks pretty bad to me. Two easy ways I can think of for
providing generic code would be #include <asm-generic/foo.h>, or
enabling generic code when feature macro HAVE_ARCH_FOO is not defined.
> > I am wondering where you want to go with this, short term and long
> > term. Is the implementation of this on other arches gonna look the same
> > -- just line for line copy of code? With maybe ia64 as the lone
> > exception?
>
> Ask Linus, he asked for the task_struct/thread_info split. Various people
[...]
> any extra clock cycles. This led to Linus requesting that everything that
> entry.S needs to access be separated out into another structure (so that
> entry.S never accesses task_struct).
Seems a sane enough direction...
Jeff
--
Jeff Garzik | "I went through my candy like hot oatmeal
Building 1024 | through an internally-buttered weasel."
MandrakeSoft | - goats.com
Jeff Garzik wrote:
> Your patch would be ok IMHO if you additionally changed the arches to
> include task struct inside struct thread_info, getting things back down
> to a single allocation for thread_info+task_struct, with 'current' once
> again being a constant offset from the beginning of thread_info. [this
> might require resurrecting the old patches to move task struct
> definitions out of sched.h proper]
Actually, you don't really need the definition of task_struct, if
include dependencies get really hairy... you really only need the size
of the task struct.
Jeff
--
Jeff Garzik | "I went through my candy like hot oatmeal
Building 1024 | through an internally-buttered weasel."
MandrakeSoft | - goats.com
Hi,
On Fri, 15 Feb 2002, Jeff Garzik wrote:
> > any extra clock cycles. This led to Linus requesting that everything that
> > entry.S needs to access be separated out into another structure (so that
> > entry.S never accesses task_struct).
>
> Seems a sane enough direction...
That wouldn't be a problem, if ia32 added the needed infrastructure to
calculate the structure offsets. It's not really that difficult, look at
the m68k implementation, which even gets dependencies right. AFAIK Keith
also added the needed infra structure to his new build system.
bye, Roman
> That wouldn't be a problem, if ia32 added the needed infrastructure to
> calculate the structure offsets. It's not really that difficult, look at
> the m68k implementation, which even gets dependencies right. AFAIK Keith
> also added the needed infra structure to his new build system.
But the offsets aren't fixed. The task structure does not lie adjacent to the
thread_info structure.
David
Hi,
On Fri, 15 Feb 2002, David Howells wrote:
> > That wouldn't be a problem, if ia32 added the needed infrastructure to
> > calculate the structure offsets.
>
> But the offsets aren't fixed. The task structure does not lie adjacent to the
> thread_info structure.
That's not the problem, I meant this sentence: "This led to Linus
requesting that everything that entry.S needs to access be separated out
into another structure." Splitting the task structure and the stack page
is fine. Keeping the most important fields in the stack page is fine too,
if the architecture requires it. But the decision what goes into
thread_info, should not be made only to avoid access to task_struct from
entry.S.
bye, Roman
Roman Zippel <[email protected]> wrote:
> > > That wouldn't be a problem, if ia32 added the needed infrastructure to
> > > calculate the structure offsets.
> >
> > But the offsets aren't fixed. The task structure does not lie adjacent to
> > the thread_info structure.
>
> That's not the problem, I meant this sentence: "This led to Linus
> requesting that everything that entry.S needs to access be separated out
> into another structure." Splitting the task structure and the stack page
> is fine. Keeping the most important fields in the stack page is fine too,
> if the architecture requires it.
I think I see what you meant.
However, I'm not particularly keen on the M68K offset generator. I'm not sure
why exactly, but it just doesn't seem the right way to do things... OTOH,
without making the assembler able to parse C structs, I'm not sure there is a
"right" way:-/
> But the decision what goes into thread_info, should not be made only to
> avoid access to task_struct from entry.S.
How about I quote Linus (who explained what he wanted better than I did).
Firstly, in response to me having supplied a patch that made a set of four
byte-size values as the status area in the task_struct:
| For the future, the biggest thing I'd like to see is actually to make
| "work" be a bitmap, because the "bytes are atomic" approach simply isn't
| portable anyway, so we might as well make things _explicitly_ atomic and
| use bit operations. Otherwise the alpha version of "work" would have to be
| four bytes per "bit" of information, which sounds really excessive.
And then after some discussion:
| In particular, there's been all that discussion about cache-coloring the
| "struct task_struct", and my personal suggestion for that whole can of
| worms is to have the "struct low_level" be in the one low cache-line, and
| make it contain a pointer to "struct task_struct" - and just split the two
| up completely. Then the low-level asm code would never have to even look
| at "task_struct", it would only look at this stuff.
(struct low_level became thread_info).
David
Hi,
On Fri, 15 Feb 2002, David Howells wrote:
> Firstly, in response to me having supplied a patch that made a set of four
> byte-size values as the status area in the task_struct:
>
> | For the future, the biggest thing I'd like to see is actually to make
> | "work" be a bitmap, because the "bytes are atomic" approach simply isn't
> | portable anyway, so we might as well make things _explicitly_ atomic and
> | use bit operations. Otherwise the alpha version of "work" would have to be
> | four bytes per "bit" of information, which sounds really excessive.
As I mentioned before I more like the byte approach, since atomic bit
field handling is quite expensive on most architectures, where a simple
set/clear byte is only one or two instructions, if there is byte
load/store instruction. So I'd really like to see to leave the decision to
the architecture, whether to use bit or byte fields.
> And then after some discussion:
>
> | In particular, there's been all that discussion about cache-coloring the
> | "struct task_struct", and my personal suggestion for that whole can of
> | worms is to have the "struct low_level" be in the one low cache-line, and
> | make it contain a pointer to "struct task_struct" - and just split the two
> | up completely. Then the low-level asm code would never have to even look
> | at "task_struct", it would only look at this stuff.
>
> (struct low_level became thread_info).
That I can agree with. :)
bye, Roman
Roman Zippel wrote:
>
> Hi,
>
> On Fri, 15 Feb 2002, David Howells wrote:
>
> > Firstly, in response to me having supplied a patch that made a set of four
> > byte-size values as the status area in the task_struct:
> >
> > | For the future, the biggest thing I'd like to see is actually to make
> > | "work" be a bitmap, because the "bytes are atomic" approach simply isn't
> > | portable anyway, so we might as well make things _explicitly_ atomic and
> > | use bit operations. Otherwise the alpha version of "work" would have to be
> > | four bytes per "bit" of information, which sounds really excessive.
>
> As I mentioned before I more like the byte approach, since atomic bit
> field handling is quite expensive on most architectures, where a simple
> set/clear byte is only one or two instructions, if there is byte
> load/store instruction. So I'd really like to see to leave the decision to
> the architecture, whether to use bit or byte fields.
We have tons of code already using atomic test_and_set_bit type
stuff... why not just make sure your bit set/clear stuff is fast? :)
Jeff
--
Jeff Garzik | "I went through my candy like hot oatmeal
Building 1024 | through an internally-buttered weasel."
MandrakeSoft | - goats.com
Hi Linus,
Roman Zippel <[email protected]> wrote:
> As I mentioned before I more like the byte approach, since atomic bit
> field handling is quite expensive on most architectures, where a simple
> set/clear byte is only one or two instructions, if there is byte
> load/store instruction. So I'd really like to see to leave the decision to
> the architecture, whether to use bit or byte fields.
Should I move the convenience bit operations back to the arch header, so that
the M68K guys can have byte-sized flags (which they can use TAS/TST on)?
David
Hi,
On Fri, 15 Feb 2002, David Howells wrote:
> Should I move the convenience bit operations back to the arch header, so that
> the M68K guys can have byte-sized flags (which they can use TAS/TST on)?
No tas needed, just byte store, so it's not only for m68k, ppc would
benefit from it too and probably some more.
bye, Roman
Hi,
On Fri, 15 Feb 2002, Jeff Garzik wrote:
> > As I mentioned before I more like the byte approach, since atomic bit
> > field handling is quite expensive on most architectures, where a simple
> > set/clear byte is only one or two instructions, if there is byte
> > load/store instruction. So I'd really like to see to leave the decision to
> > the architecture, whether to use bit or byte fields.
>
> We have tons of code already using atomic test_and_set_bit type
> stuff... why not just make sure your bit set/clear stuff is fast? :)
Because in this case there is no atomic test_and_(clear|set)_bit needed.
We only need to clear the bit/byte before scheduling/signal delivery is
started.
bye, Roman
On Fri, Feb 15, 2002 at 12:56:52PM +0000, David Howells wrote:
> And then after some discussion:
>
> | In particular, there's been all that discussion about cache-coloring the
> | "struct task_struct", and my personal suggestion for that whole can of
> | worms is to have the "struct low_level" be in the one low cache-line, and
> | make it contain a pointer to "struct task_struct" - and just split the two
> | up completely. Then the low-level asm code would never have to even look
> | at "task_struct", it would only look at this stuff.
>
> (struct low_level became thread_info).
I see. Yes, that makes sense. My mistake was following davem's
lead and pulling everything out of struct thread_struct. In
reality, I shouldn't have moved hardly anything and $8 should
still point to current.
r~
On Fri, Feb 15, 2002 at 02:07:55PM +0000, David Howells wrote:
> Should I move the convenience bit operations back to the arch header, so that
> the M68K guys can have byte-sized flags (which they can use TAS/TST on)?
If you provide some way to override the convenience bit operations,
then I suspect that you can let most folks use the byte ops. It's
only older alphas that would absolutely have to play games.
Of course, I now have 9 flag bits defined, so I might not go back
to bytes anyway...
r~