2009-01-06 04:13:53

by Arjan van de Ven

[permalink] [raw]
Subject: [PATCH 1/6] fastboot: Asynchronous function calls to speed up kernel boot

>From 15815a54b95e6866ff9532dead9cca4d6a298b54 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <[email protected]>
Date: Sun, 4 Jan 2009 05:32:28 -0800
Subject: [PATCH] fastboot: Asynchronous function calls to speed up kernel boot

Right now, most of the kernel boot is strictly synchronous, such that
various hardware delays are done sequentially.

In order to make the kernel boot faster, this patch introduces
infrastructure to allow doing some of the initialization steps
asynchronously, which will hide significant portions of the hardware delays
in practice.

In order to not change device order and other similar observables, this
patch does NOT do full parallel initialization.

Rather, it operates more in the way an out of order CPU does; the work may
be done out of order and asynchronous, but the observable effects
(instruction retiring for the CPU) are still done in the original sequence.

Signed-off-by: Arjan van de Ven <[email protected]>
---
include/linux/async.h | 21 ++++
init/do_mounts.c | 2 +
init/main.c | 5 +-
kernel/Makefile | 3 +-
kernel/async.c | 307 ++++++++++++++++++++++++++++++++++++++++++++++++
kernel/irq/autoprobe.c | 5 +
kernel/module.c | 2 +
7 files changed, 343 insertions(+), 2 deletions(-)
create mode 100644 include/linux/async.h
create mode 100644 kernel/async.c

diff --git a/include/linux/async.h b/include/linux/async.h
new file mode 100644
index 0000000..678d4fd
--- /dev/null
+++ b/include/linux/async.h
@@ -0,0 +1,21 @@
+/*
+ * async.h: Asynchronous function calls for boot performance
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Arjan van de Ven <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/types.h>
+
+typedef u64 async_cookie_t;
+typedef void (async_func_ptr) (void *data, async_cookie_t cookie);
+
+extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data);
+extern void async_synchronize_full(void);
+extern void async_synchronize_cookie(async_cookie_t cookie);
+
diff --git a/init/do_mounts.c b/init/do_mounts.c
index d055b19..b9df336 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/initrd.h>
+#include <linux/async.h>

#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
@@ -372,6 +373,7 @@ void __init prepare_namespace(void)
/* wait for the known devices to complete their probing */
while (driver_probe_done() != 0)
msleep(100);
+ async_synchronize_full();

md_run_setup();

diff --git a/init/main.c b/init/main.c
index cd168eb..40d5373 100644
--- a/init/main.c
+++ b/init/main.c
@@ -63,6 +63,7 @@
#include <linux/signal.h>
#include <linux/idr.h>
#include <linux/ftrace.h>
+#include <linux/async.h>
#include <trace/boot.h>

#include <asm/io.h>
@@ -687,7 +688,7 @@ asmlinkage void __init start_kernel(void)
rest_init();
}

-static int initcall_debug;
+int initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);

int do_one_initcall(initcall_t fn)
@@ -788,6 +789,8 @@ static void run_init_process(char *init_filename)
*/
static int noinline init_post(void)
{
+ /* need to finish all async __init code before freeing the memory */
+ async_synchronize_full();
free_initmem();
unlock_kernel();
mark_rodata_ro();
diff --git a/kernel/Makefile b/kernel/Makefile
index e1c5bf3..2921d90 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
- notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o
+ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
+ async.o

ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
diff --git a/kernel/async.c b/kernel/async.c
new file mode 100644
index 0000000..8ecebf5
--- /dev/null
+++ b/kernel/async.c
@@ -0,0 +1,307 @@
+/*
+ * async.c: Asynchronous function calls for boot performance
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Arjan van de Ven <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+
+/*
+
+Goals and Theory of Operation
+
+The primary goal of this feature is to reduce the kernel boot time,
+by doing various independent hardware delays and discovery operations
+decoupled and not strictly serialized.
+
+More specifically, the asynchronous function call concept allows
+certain operations (primarily during system boot) to happen
+asynchronously, out of order, while these operations still
+have their externally visible parts happen sequentially and in-order.
+(not unlike how out-of-order CPUs retire their instructions in order)
+
+Key to the asynchronous function call implementation is the concept of
+a "sequence cookie" (which, although it has an abstracted type, can be
+thought of as a monotonically incrementing number).
+
+The async core will assign each scheduled event such a sequence cookie and
+pass this to the called functions.
+
+The asynchronously called function should before doing a globally visible
+operation, such as registering device numbers, call the
+async_synchronize_cookie() function and pass in its own cookie. The
+async_synchronize_cookie() function will make sure that all asynchronous
+operations that were scheduled prior to the operation corresponding with the
+cookie have completed.
+
+Subsystem/driver initialization code that scheduled asynchronous probe
+functions, but which shares global resources with other drivers/subsystems
+that do not use the asynchronous call feature, need to do a full
+synchronization with the async_synchronize_full() function, before returning
+from their init function. This is to maintain strict ordering between the
+asynchronous and synchronous parts of the kernel.
+
+*/
+
+#include <linux/async.h>
+#include <linux/module.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <asm/atomic.h>
+
+static async_cookie_t next_cookie = 1;
+static async_cookie_t lowest_in_progress = 1;
+
+
+static LIST_HEAD(async_pending);
+static LIST_HEAD(async_running);
+static DEFINE_SPINLOCK(async_lock);
+
+struct async_entry {
+ struct list_head list;
+ async_cookie_t cookie;
+ async_func_ptr *func;
+ void *data;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(async_done);
+static DECLARE_WAIT_QUEUE_HEAD(async_new);
+
+static atomic_t entry_count;
+static atomic_t thread_count;
+
+extern int initcall_debug;
+
+
+/*
+ * MUST be called with the lock held!
+ */
+static void __recalc_lowest_in_progress(void)
+{
+ struct async_entry *entry;
+ if (!list_empty(&async_pending)) {
+ entry = list_first_entry(&async_pending,
+ struct async_entry, list);
+ lowest_in_progress = entry->cookie;
+ } else if (!list_empty(&async_running)) {
+ entry = list_first_entry(&async_running,
+ struct async_entry, list);
+ lowest_in_progress = entry->cookie;
+ } else {
+ /* nothing in progress... next_cookie is "infinity" */
+ lowest_in_progress = next_cookie;
+ }
+
+}
+/*
+ * pick the first pending entry and run it
+ */
+static void run_one_entry(void)
+{
+ unsigned long flags;
+ struct async_entry *entry;
+ ktime_t calltime, delta, rettime;
+
+ /* 1) pick one task from the pending queue */
+
+ spin_lock_irqsave(&async_lock, flags);
+ if (list_empty(&async_pending))
+ goto out;
+ entry = list_first_entry(&async_pending, struct async_entry, list);
+
+ /* 2) move it to the running queue */
+ list_del(&entry->list);
+ list_add_tail(&entry->list, &async_running);
+ spin_unlock_irqrestore(&async_lock, flags);
+
+ /* 3) run it (and print duration)*/
+ if (initcall_debug) {
+ printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current));
+ calltime = ktime_get();
+ }
+ entry->func(entry->data, entry->cookie);
+ if (initcall_debug) {
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie,
+ entry->func, ktime_to_ns(delta) >> 10);
+ }
+
+ /* 4) remove it from the running queue */
+ spin_lock_irqsave(&async_lock, flags);
+ list_del(&entry->list);
+
+ /* 5) free the entry */
+ kfree(entry);
+ atomic_dec(&entry_count);
+
+ /* 6) update the lowest_in_progress value */
+ __recalc_lowest_in_progress();
+
+ spin_unlock_irqrestore(&async_lock, flags);
+
+ /* 7) wake up any waiters. */
+ wake_up(&async_done);
+ return;
+
+out:
+ spin_unlock_irqrestore(&async_lock, flags);
+}
+
+
+async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
+{
+ struct async_entry *entry;
+ unsigned long flags;
+ async_cookie_t newcookie;
+
+
+ /* allow irq-off callers */
+ entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC);
+ if (!entry) {
+ spin_lock_irqsave(&async_lock, flags);
+ newcookie = next_cookie++;
+ spin_unlock_irqrestore(&async_lock, flags);
+
+ /* low on memory.. run synchronously */
+ ptr(data, newcookie);
+ return newcookie;
+ }
+ entry->func = ptr;
+ entry->data = data;
+
+ spin_lock_irqsave(&async_lock, flags);
+ newcookie = entry->cookie = next_cookie++;
+ list_add_tail(&entry->list, &async_pending);
+ atomic_inc(&entry_count);
+ spin_unlock_irqrestore(&async_lock, flags);
+ wake_up(&async_new);
+ return newcookie;
+}
+EXPORT_SYMBOL_GPL(async_schedule);
+
+void async_synchronize_full(void)
+{
+ async_synchronize_cookie(next_cookie);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_full);
+
+void async_synchronize_cookie(async_cookie_t cookie)
+{
+ ktime_t starttime, delta, endtime;
+
+ if (initcall_debug) {
+ printk("async_waiting @ %i\n", task_pid_nr(current));
+ starttime = ktime_get();
+ }
+
+ wait_event(async_done, lowest_in_progress >= cookie);
+
+ if (initcall_debug) {
+ endtime = ktime_get();
+ delta = ktime_sub(endtime, starttime);
+
+ printk("async_continuing @ %i after %lli usec\n",
+ task_pid_nr(current), ktime_to_ns(delta) >> 10);
+ }
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookie);
+
+
+static int async_thread(void *unused)
+{
+ DECLARE_WAITQUEUE(wq, current);
+ add_wait_queue(&async_new, &wq);
+
+ while (!kthread_should_stop()) {
+ int ret = HZ;
+ set_current_state(TASK_INTERRUPTIBLE);
+ /*
+ * check the list head without lock.. false positives
+ * are dealt with inside run_one_entry() while holding
+ * the lock.
+ */
+ rmb();
+ if (!list_empty(&async_pending))
+ run_one_entry();
+ else
+ ret = schedule_timeout(HZ);
+
+ if (ret == 0) {
+ /*
+ * we timed out, this means we as thread are redundant.
+ * we sign off and die, but we to avoid any races there
+ * is a last-straw check to see if work snuck in.
+ */
+ atomic_dec(&thread_count);
+ wmb(); /* manager must see our departure first */
+ if (list_empty(&async_pending))
+ break;
+ /*
+ * woops work came in between us timing out and us
+ * signing off; we need to stay alive and keep working.
+ */
+ atomic_inc(&thread_count);
+ }
+ }
+ remove_wait_queue(&async_new, &wq);
+
+ return 0;
+}
+
+static int async_manager_thread(void *unused)
+{
+ int max_threads;
+ DECLARE_WAITQUEUE(wq, current);
+ add_wait_queue(&async_new, &wq);
+
+ while (!kthread_should_stop()) {
+ int tc, ec;
+
+ /*
+ * Maximum number of worker threads.
+ * Even on the smallest machine we want 8
+ * Scaling by 4 per logical CPU
+ * But no more than 256 to not overload stuff too much
+ * (and yes these are magic numbers that might need tuning)
+ * Calculated dynamic because early on the nr of online cpus
+ * is 1...
+ */
+ max_threads = 4 + 4 * num_online_cpus();
+ if (max_threads > 256)
+ max_threads = 256;
+
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ tc = atomic_read(&thread_count);
+ rmb();
+ ec = atomic_read(&entry_count);
+
+ while (tc < ec && tc < max_threads) {
+ kthread_run(async_thread, NULL, "async/%i", tc);
+ atomic_inc(&thread_count);
+ tc++;
+ }
+
+ schedule();
+ }
+ remove_wait_queue(&async_new, &wq);
+
+ return 0;
+}
+
+static int __init async_init(void)
+{
+ kthread_run(async_manager_thread, NULL, "async/mgr");
+ return 0;
+}
+
+core_initcall(async_init);
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index cc0f732..1de9700 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
+#include <linux/async.h>

#include "internals.h"

@@ -34,6 +35,10 @@ unsigned long probe_irq_on(void)
unsigned int status;
int i;

+ /*
+ * quiesce the kernel, or at least the asynchronous portion
+ */
+ async_synchronize_full();
mutex_lock(&probing_active);
/*
* something may have generated an irq long ago and we want to
diff --git a/kernel/module.c b/kernel/module.c
index dd2a541..da3ad68 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -51,6 +51,7 @@
#include <asm/sections.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
+#include <linux/async.h>

#if 0
#define DEBUGP printk
@@ -809,6 +810,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
mod->exit();
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
+ async_synchronize_full();
mutex_lock(&module_mutex);
/* Store the name of the last unloaded module for diagnostic purposes */
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
--
1.6.0.6



--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org


2009-01-06 04:25:46

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/6] fastboot: Asynchronous function calls to speed up kernel boot

On Mon, 5 Jan 2009 20:10:41 -0800 Arjan van de Ven <[email protected]> wrote:

> kernel/async.c | 307 ++++++++++++++++++++++++++++++++++++++++++++++++

Am still wondering if this is unacceptably duplicative of dhowells's
slow-work infrastructure: http://lkml.org/lkml/2008/11/20/193

2009-01-06 04:35:29

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH 1/6] fastboot: Asynchronous function calls to speed up kernel boot

On Mon, 5 Jan 2009 20:25:14 -0800
Andrew Morton <[email protected]> wrote:

> On Mon, 5 Jan 2009 20:10:41 -0800 Arjan van de Ven
> <[email protected]> wrote:
>
> > kernel/async.c | 307
> > ++++++++++++++++++++++++++++++++++++++++++++++++
>
> Am still wondering if this is unacceptably duplicative of dhowells's
> slow-work infrastructure: http://lkml.org/lkml/2008/11/20/193
>
>

I talked to David about this today (as I wrote in 0/6).
Based on that discussion we got to the conclusion we have incompatible
requirements. He needs priorities to be honored (which means rather out
of order execution of the tasks) while I need strict ordering (for the
synchronization). While it's not entirely impossible to combine those
two into one system, the resulting complexity isn't really worth it yet.

Of the 307 lines, only 100 are actual thread pool code (the rest is
synchronization and admin code), and about half of those 100 lines are
comments.


--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org

2009-01-06 13:55:37

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH 1/6] fastboot: Asynchronous function calls to speed up kernel boot

On Mon, 5 Jan 2009 20:25:14 -0800
Andrew Morton <[email protected]> wrote:

> On Mon, 5 Jan 2009 20:10:41 -0800 Arjan van de Ven
> <[email protected]> wrote:
>
> > kernel/async.c | 307
> > ++++++++++++++++++++++++++++++++++++++++++++++++
>
> Am still wondering if this is unacceptably duplicative of dhowells's
> slow-work infrastructure: http://lkml.org/lkml/2008/11/20/193

ok having looked at it a lot more; I can add priorities to the async
function calls, at which point they are a superset to the slow-work
infrastructure basically. I'll need to check with David to make 10)%
sure the solution I have in mind will work for him, but I suspect it
will.


--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org

2009-01-14 05:31:55

by Zhang, Rui

[permalink] [raw]
Subject: Re: [PATCH 1/6] fastboot: Asynchronous function calls to speed up kernel boot

On Tue, 2009-01-06 at 12:10 +0800, Arjan van de Ven wrote:
> From 15815a54b95e6866ff9532dead9cca4d6a298b54 Mon Sep 17 00:00:00 2001
> From: Arjan van de Ven <[email protected]>
> Date: Sun, 4 Jan 2009 05:32:28 -0800
> Subject: [PATCH] fastboot: Asynchronous function calls to speed up kernel boot
>
> Right now, most of the kernel boot is strictly synchronous, such that
> various hardware delays are done sequentially.
>
> In order to make the kernel boot faster, this patch introduces
> infrastructure to allow doing some of the initialization steps
> asynchronously, which will hide significant portions of the hardware delays
> in practice.
>
> In order to not change device order and other similar observables, this
> patch does NOT do full parallel initialization.
>
> Rather, it operates more in the way an out of order CPU does; the work may
> be done out of order and asynchronous, but the observable effects
> (instruction retiring for the CPU) are still done in the original sequence.
>
> Signed-off-by: Arjan van de Ven <[email protected]>

> +
> +void async_synchronize_cookie(async_cookie_t cookie)
> +{
> + ktime_t starttime, delta, endtime;
> +
> + if (initcall_debug) {
> + printk("async_waiting @ %i\n", task_pid_nr(current));
> + starttime = ktime_get();
> + }
> +
> + wait_event(async_done, lowest_in_progress >= cookie);
> +

In some cases, we only want to wait for a specific cookie
rather than all the cookies smaller than it.

For example:
device cookie
ACPI battery 1
ata port 0 2
ata port 1 3
ata port 0 and port 1 in the same host can not be probed in parallel.

In this case, ata port1 should only wait for cookie 2 rather than both 1 and 2.

how about the patch below? (it's just a prototype and I have not tested it yet)
If it's okay, I'll do some tricks in the libata-core so that port1 can get the cookie of port 0.

Introduces two new interfaces
async_synchronize_one_cookie
async_synchronize_one_cookie_special
users can use these to wait for a specific cookie.

Signed-off-by: Zhang Rui <[email protected]>
---
drivers/ata/libata-core.c | 4 +-
include/linux/async.h | 6 ++-
kernel/async.c | 82 +++++++++++++++++++++++++++++++++++++++++-----
3 files changed, 80 insertions(+), 12 deletions(-)

Index: linux-2.6/kernel/async.c
===================================================================
--- linux-2.6.orig/kernel/async.c
+++ linux-2.6/kernel/async.c
@@ -87,6 +87,44 @@ extern int initcall_debug;
/*
* MUST be called with the lock held!
*/
+static int __cookie_is_done(struct list_head *running, async_cookie_t cookie)
+{
+ struct async_entry *entry;
+
+ if (!list_empty(running)) {
+ list_for_each_entry(entry, running, list) {
+ if (entry->cookie > cookie)
+ break;
+ if (entry->cookie == cookie)
+ return 0;
+ }
+ }
+
+ if (!list_empty(&async_pending)) {
+ list_for_each_entry(entry, &async_pending, list) {
+ if (entry->cookie > cookie)
+ break;
+ if (entry->cookie == cookie)
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int cookie_is_done(struct list_head *running, async_cookie_t cookie)
+{
+ unsigned long flags;
+ async_cookie_t ret;
+
+ spin_lock_irqsave(&async_lock, flags);
+ ret = __cookie_is_done(running, cookie);
+ spin_unlock_irqrestore(&async_lock, flags);
+ return ret;
+}
+
+/*
+ * MUST be called with the lock held!
+ */
static async_cookie_t __lowest_in_progress(struct list_head *running)
{
struct async_entry *entry;
@@ -220,18 +258,19 @@ EXPORT_SYMBOL_GPL(async_schedule_special
void async_synchronize_full(void)
{
do {
- async_synchronize_cookie(next_cookie);
+ async_synchronize_cookies(next_cookie);
} while (!list_empty(&async_running) || !list_empty(&async_pending));
}
EXPORT_SYMBOL_GPL(async_synchronize_full);

void async_synchronize_full_special(struct list_head *list)
{
- async_synchronize_cookie_special(next_cookie, list);
+ async_synchronize_cookies_special(next_cookie, list);
}
EXPORT_SYMBOL_GPL(async_synchronize_full_special);

-void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running)
+static void __async_synchronize_cookie_special(async_cookie_t cookie,
+ struct list_head *running, int type)
{
ktime_t starttime, delta, endtime;

@@ -240,7 +279,10 @@ void async_synchronize_cookie_special(as
starttime = ktime_get();
}

- wait_event(async_done, lowest_in_progress(running) >= cookie);
+ if (type)
+ wait_event(async_done, lowest_in_progress(running) >= cookie);
+ else
+ wait_event(async_done, cookie_is_done(running, cookie));

if (initcall_debug && system_state == SYSTEM_BOOTING) {
endtime = ktime_get();
@@ -250,13 +292,37 @@ void async_synchronize_cookie_special(as
task_pid_nr(current), ktime_to_ns(delta) >> 10);
}
}
-EXPORT_SYMBOL_GPL(async_synchronize_cookie_special);

-void async_synchronize_cookie(async_cookie_t cookie)
+void async_synchronize_cookies_special(async_cookie_t cookie, struct list_head *running)
+{
+ __async_synchronize_cookie_special(cookie, running, 1);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookies_special);
+
+void async_synchronize_cookies(async_cookie_t cookie)
+{
+ __async_synchronize_cookie_special(cookie, &async_running, 1);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookies);
+
+
+/*
+ * sychronize a specific cookie
+ * block until the entry with a speicific cookie is done.
+ * Note that waiting for a cookie while holding it is not allowed.
+ * because it may cause a deadlock issue.
+ */
+void async_synchronize_one_cookie_special(async_cookie_t cookie, struct list_head *running)
+{
+ __async_synchronize_cookie_special(cookie, running, 0);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_one_cookie_special);
+
+void async_synchronize_one_cookie(async_cookie_t cookie)
{
- async_synchronize_cookie_special(cookie, &async_running);
+ __async_synchronize_cookie_special(cookie, &async_running, 0);
}
-EXPORT_SYMBOL_GPL(async_synchronize_cookie);
+EXPORT_SYMBOL_GPL(async_synchronize_one_cookie);


static int async_thread(void *unused)
Index: linux-2.6/include/linux/async.h
===================================================================
--- linux-2.6.orig/include/linux/async.h
+++ linux-2.6/include/linux/async.h
@@ -20,6 +20,8 @@ extern async_cookie_t async_schedule(asy
extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list);
extern void async_synchronize_full(void);
extern void async_synchronize_full_special(struct list_head *list);
-extern void async_synchronize_cookie(async_cookie_t cookie);
-extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list);
+extern void async_synchronize_cookies(async_cookie_t cookie);
+extern void async_synchronize_cookies_special(async_cookie_t cookie, struct list_head *list);
+extern void async_synchronize_one_cookie(async_cookie_t cookie);
+extern void async_synchronize_one_cookie_special(async_cookie_t cookie, struct list_head *list);

Index: linux-2.6/drivers/ata/libata-core.c
===================================================================
--- linux-2.6.orig/drivers/ata/libata-core.c
+++ linux-2.6/drivers/ata/libata-core.c
@@ -5929,7 +5929,7 @@ static void async_port_probe(void *data,
* don't need to wait for port 0, only for later ports.
*/
if (!(ap->host->flags & ATA_HOST_PARALLEL_SCAN) && ap->port_no != 0)
- async_synchronize_cookie(cookie);
+ async_synchronize_cookies(cookie);

/* probe */
if (ap->ops->error_handler) {
@@ -5969,7 +5969,7 @@ static void async_port_probe(void *data,
}

/* in order to keep device order, we need to synchronize at this point */
- async_synchronize_cookie(cookie);
+ async_synchronize_cookies(cookie);

ata_scsi_scan_host(ap, 1);





Attachments:
patch-async-introduce-new-interface (6.12 kB)

2009-01-14 09:03:10

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH 1/6] fastboot: Asynchronous function calls to speed up kernel boot

On Wed, 14 Jan 2009 13:32:43 +0800
Zhang Rui <[email protected]> wrote:
> > +
> > + wait_event(async_done, lowest_in_progress >= cookie);
> > +
>
> In some cases, we only want to wait for a specific cookie
> rather than all the cookies smaller than it.
>
> For example:
> device cookie
> ACPI battery 1
> ata port 0 2
> ata port 1 3
> ata port 0 and port 1 in the same host can not be probed in parallel.
>
> In this case, ata port1 should only wait for cookie 2 rather than
> both 1 and 2.
>

I really don't want to go this way; ordering is really important and I
don't want to break this.

If you really have a fully separate domain (and there's only few of
those in our kernel), then use a special run list.



--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org

2009-01-15 05:54:25

by Zhang, Rui

[permalink] [raw]
Subject: Re: [PATCH 1/6] fastboot: Asynchronous function calls to speed up kernel boot

On Tue, 2009-01-06 at 12:10 +0800, Arjan van de Ven wrote:
> From 15815a54b95e6866ff9532dead9cca4d6a298b54 Mon Sep 17 00:00:00 2001
> From: Arjan van de Ven <[email protected]>
> Date: Sun, 4 Jan 2009 05:32:28 -0800
> Subject: [PATCH] fastboot: Asynchronous function calls to speed up kernel boot
>
> Right now, most of the kernel boot is strictly synchronous, such that
> various hardware delays are done sequentially.
>
> In order to make the kernel boot faster, this patch introduces
> infrastructure to allow doing some of the initialization steps
> asynchronously, which will hide significant portions of the hardware delays
> in practice.
>
> In order to not change device order and other similar observables, this
> patch does NOT do full parallel initialization.
>
> Rather, it operates more in the way an out of order CPU does; the work may
> be done out of order and asynchronous, but the observable effects
> (instruction retiring for the CPU) are still done in the original sequence.
>
> Signed-off-by: Arjan van de Ven <[email protected]>
> ---
> include/linux/async.h | 21 ++++
> init/do_mounts.c | 2 +
> init/main.c | 5 +-
> kernel/Makefile | 3 +-
> kernel/async.c | 307 ++++++++++++++++++++++++++++++++++++++++++++++++
> kernel/irq/autoprobe.c | 5 +
> kernel/module.c | 2 +
> 7 files changed, 343 insertions(+), 2 deletions(-)
> create mode 100644 include/linux/async.h
> create mode 100644 kernel/async.c
> +/*
> + * pick the first pending entry and run it
> + */
> +static void run_one_entry(void)
> +{
> + unsigned long flags;
> + struct async_entry *entry;
> + ktime_t calltime, delta, rettime;
> +
> + /* 1) pick one task from the pending queue */
> +
> + spin_lock_irqsave(&async_lock, flags);
> + if (list_empty(&async_pending))
> + goto out;
> + entry = list_first_entry(&async_pending, struct async_entry, list);
> +
> + /* 2) move it to the running queue */
> + list_del(&entry->list);
> + list_add_tail(&entry->list, &async_running);
> + spin_unlock_irqrestore(&async_lock, flags);
> +
another question,
we should move the entry to the proper run list, don't we?

Signed-off-by: Zhang Rui <[email protected]>
---
kernel/async.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux-2.6/kernel/async.c
===================================================================
--- linux-2.6.orig/kernel/async.c
+++ linux-2.6/kernel/async.c
@@ -133,7 +133,7 @@ static void run_one_entry(void)

/* 2) move it to the running queue */
list_del(&entry->list);
- list_add_tail(&entry->list, &async_running);
+ list_add_tail(&entry->list, entry->running);
spin_unlock_irqrestore(&async_lock, flags);

/* 3) run it (and print duration)*/


> + /* 3) run it (and print duration)*/
> + if (initcall_debug) {
> + printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current));
> + calltime = ktime_get();
> + }
> + entry->func(entry->data, entry->cookie);
> + if (initcall_debug) {
> + rettime = ktime_get();
> + delta = ktime_sub(rettime, calltime);
> + printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie,
> + entry->func, ktime_to_ns(delta) >> 10);
> + }
> +
> + /* 4) remove it from the running queue */
> + spin_lock_irqsave(&async_lock, flags);
> + list_del(&entry->list);
> +
> + /* 5) free the entry */
> + kfree(entry);
> + atomic_dec(&entry_count);
> +
> + /* 6) update the lowest_in_progress value */
> + __recalc_lowest_in_progress();
> +
> + spin_unlock_irqrestore(&async_lock, flags);
> +
> + /* 7) wake up any waiters. */
> + wake_up(&async_done);
> + return;
> +
> +out:
> + spin_unlock_irqrestore(&async_lock, flags);
> +}
> +
> +
> +async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
> +{
> + struct async_entry *entry;
> + unsigned long flags;
> + async_cookie_t newcookie;
> +
> +
> + /* allow irq-off callers */
> + entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC);
> + if (!entry) {
> + spin_lock_irqsave(&async_lock, flags);
> + newcookie = next_cookie++;
> + spin_unlock_irqrestore(&async_lock, flags);
> +
> + /* low on memory.. run synchronously */
> + ptr(data, newcookie);
> + return newcookie;
> + }
> + entry->func = ptr;
> + entry->data = data;
> +
> + spin_lock_irqsave(&async_lock, flags);
> + newcookie = entry->cookie = next_cookie++;
> + list_add_tail(&entry->list, &async_pending);
> + atomic_inc(&entry_count);
> + spin_unlock_irqrestore(&async_lock, flags);
> + wake_up(&async_new);
> + return newcookie;
> +}
> +EXPORT_SYMBOL_GPL(async_schedule);
> +