version 3 of the async function call patches
* Dropped the ACPI part; it broke i surprising ways; needs a rethink
(working with Len and co on that)
* Included asynchronous delete()
There is still no integration with the out of tree slow-work patch;
the slow-work patch does really weird things with refcounts that make it
really specific to cachefs and, frankly, I think it's a rather
weird design.
The overlap is maybe 200 lines of code at most, of which 100+ would
need to be added just to deal with the various weirdnesses. I really
don't think this is a good idea or a fair requirement on the async
function call patch series.
The following changes since commit 8903709b054a8dafe4e8c6d9a6444034d7aba36f:
Harvey Harrison (1):
xtensa: introduce swab.h
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/arjan/linux-2.6-async.git master
Arjan van de Ven (7):
async: Asynchronous function calls to speed up kernel boot
fastboot: make scsi probes asynchronous
fastboot: make the libata port scan asynchronous
fastboot: Make libata initialization even more async
async: make the final inode deletion an asynchronous event
bootchart: improve output based on Dave Jones' feedback
async: don't do the initcall stuff post boot
drivers/ata/libata-core.c | 96 +++++++-------
drivers/scsi/scsi_scan.c | 3 +
drivers/scsi/sd.c | 109 ++++++++++------
fs/inode.c | 20 ++-
fs/super.c | 10 ++
include/linux/async.h | 25 ++++
include/linux/fs.h | 5 +
init/do_mounts.c | 2 +
init/main.c | 5 +-
kernel/Makefile | 3 +-
kernel/async.c | 321 +++++++++++++++++++++++++++++++++++++++++++++
kernel/irq/autoprobe.c | 5 +
kernel/module.c | 2 +
scripts/bootgraph.pl | 16 ++-
14 files changed, 521 insertions(+), 101 deletions(-)
create mode 100644 include/linux/async.h
create mode 100644 kernel/async.c
--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
>From 22a9d645677feefd402befd02edd59b122289ef1 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <[email protected]>
Date: Wed, 7 Jan 2009 08:45:46 -0800
Subject: [PATCH] async: Asynchronous function calls to speed up kernel boot
Right now, most of the kernel boot is strictly synchronous, such that
various hardware delays are done sequentially.
In order to make the kernel boot faster, this patch introduces
infrastructure to allow doing some of the initialization steps
asynchronously, which will hide significant portions of the hardware delays
in practice.
In order to not change device order and other similar observables, this
patch does NOT do full parallel initialization.
Rather, it operates more in the way an out of order CPU does; the work may
be done out of order and asynchronous, but the observable effects
(instruction retiring for the CPU) are still done in the original sequence.
Signed-off-by: Arjan van de Ven <[email protected]>
---
include/linux/async.h | 25 ++++
init/do_mounts.c | 2 +
init/main.c | 5 +-
kernel/Makefile | 3 +-
kernel/async.c | 321 ++++++++++++++++++++++++++++++++++++++++++++++++
kernel/irq/autoprobe.c | 5 +
kernel/module.c | 2 +
7 files changed, 361 insertions(+), 2 deletions(-)
create mode 100644 include/linux/async.h
create mode 100644 kernel/async.c
diff --git a/include/linux/async.h b/include/linux/async.h
new file mode 100644
index 0000000..c4ecacd
--- /dev/null
+++ b/include/linux/async.h
@@ -0,0 +1,25 @@
+/*
+ * async.h: Asynchronous function calls for boot performance
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Arjan van de Ven <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+typedef u64 async_cookie_t;
+typedef void (async_func_ptr) (void *data, async_cookie_t cookie);
+
+extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data);
+extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list);
+extern void async_synchronize_full(void);
+extern void async_synchronize_full_special(struct list_head *list);
+extern void async_synchronize_cookie(async_cookie_t cookie);
+extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list);
+
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 5efca73..708105e 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/initrd.h>
+#include <linux/async.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
@@ -372,6 +373,7 @@ void __init prepare_namespace(void)
/* wait for the known devices to complete their probing */
while (driver_probe_done() != 0)
msleep(100);
+ async_synchronize_full();
md_run_setup();
diff --git a/init/main.c b/init/main.c
index b5a892c..f66715d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -62,6 +62,7 @@
#include <linux/signal.h>
#include <linux/idr.h>
#include <linux/ftrace.h>
+#include <linux/async.h>
#include <trace/boot.h>
#include <asm/io.h>
@@ -684,7 +685,7 @@ asmlinkage void __init start_kernel(void)
rest_init();
}
-static int initcall_debug;
+int initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
int do_one_initcall(initcall_t fn)
@@ -785,6 +786,8 @@ static void run_init_process(char *init_filename)
*/
static noinline int init_post(void)
{
+ /* need to finish all async __init code before freeing the memory */
+ async_synchronize_full();
free_initmem();
unlock_kernel();
mark_rodata_ro();
diff --git a/kernel/Makefile b/kernel/Makefile
index e1c5bf3..2921d90 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,7 +9,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
- notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o
+ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
+ async.o
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
diff --git a/kernel/async.c b/kernel/async.c
new file mode 100644
index 0000000..afaa8a6
--- /dev/null
+++ b/kernel/async.c
@@ -0,0 +1,321 @@
+/*
+ * async.c: Asynchronous function calls for boot performance
+ *
+ * (C) Copyright 2009 Intel Corporation
+ * Author: Arjan van de Ven <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+
+/*
+
+Goals and Theory of Operation
+
+The primary goal of this feature is to reduce the kernel boot time,
+by doing various independent hardware delays and discovery operations
+decoupled and not strictly serialized.
+
+More specifically, the asynchronous function call concept allows
+certain operations (primarily during system boot) to happen
+asynchronously, out of order, while these operations still
+have their externally visible parts happen sequentially and in-order.
+(not unlike how out-of-order CPUs retire their instructions in order)
+
+Key to the asynchronous function call implementation is the concept of
+a "sequence cookie" (which, although it has an abstracted type, can be
+thought of as a monotonically incrementing number).
+
+The async core will assign each scheduled event such a sequence cookie and
+pass this to the called functions.
+
+The asynchronously called function should before doing a globally visible
+operation, such as registering device numbers, call the
+async_synchronize_cookie() function and pass in its own cookie. The
+async_synchronize_cookie() function will make sure that all asynchronous
+operations that were scheduled prior to the operation corresponding with the
+cookie have completed.
+
+Subsystem/driver initialization code that scheduled asynchronous probe
+functions, but which shares global resources with other drivers/subsystems
+that do not use the asynchronous call feature, need to do a full
+synchronization with the async_synchronize_full() function, before returning
+from their init function. This is to maintain strict ordering between the
+asynchronous and synchronous parts of the kernel.
+
+*/
+
+#include <linux/async.h>
+#include <linux/module.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <asm/atomic.h>
+
+static async_cookie_t next_cookie = 1;
+
+#define MAX_THREADS 256
+#define MAX_WORK 32768
+
+static LIST_HEAD(async_pending);
+static LIST_HEAD(async_running);
+static DEFINE_SPINLOCK(async_lock);
+
+struct async_entry {
+ struct list_head list;
+ async_cookie_t cookie;
+ async_func_ptr *func;
+ void *data;
+ struct list_head *running;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(async_done);
+static DECLARE_WAIT_QUEUE_HEAD(async_new);
+
+static atomic_t entry_count;
+static atomic_t thread_count;
+
+extern int initcall_debug;
+
+
+/*
+ * MUST be called with the lock held!
+ */
+static async_cookie_t __lowest_in_progress(struct list_head *running)
+{
+ struct async_entry *entry;
+ if (!list_empty(&async_pending)) {
+ entry = list_first_entry(&async_pending,
+ struct async_entry, list);
+ return entry->cookie;
+ } else if (!list_empty(running)) {
+ entry = list_first_entry(running,
+ struct async_entry, list);
+ return entry->cookie;
+ } else {
+ /* nothing in progress... next_cookie is "infinity" */
+ return next_cookie;
+ }
+
+}
+/*
+ * pick the first pending entry and run it
+ */
+static void run_one_entry(void)
+{
+ unsigned long flags;
+ struct async_entry *entry;
+ ktime_t calltime, delta, rettime;
+
+ /* 1) pick one task from the pending queue */
+
+ spin_lock_irqsave(&async_lock, flags);
+ if (list_empty(&async_pending))
+ goto out;
+ entry = list_first_entry(&async_pending, struct async_entry, list);
+
+ /* 2) move it to the running queue */
+ list_del(&entry->list);
+ list_add_tail(&entry->list, &async_running);
+ spin_unlock_irqrestore(&async_lock, flags);
+
+ /* 3) run it (and print duration)*/
+ if (initcall_debug) {
+ printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current));
+ calltime = ktime_get();
+ }
+ entry->func(entry->data, entry->cookie);
+ if (initcall_debug) {
+ rettime = ktime_get();
+ delta = ktime_sub(rettime, calltime);
+ printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie,
+ entry->func, ktime_to_ns(delta) >> 10);
+ }
+
+ /* 4) remove it from the running queue */
+ spin_lock_irqsave(&async_lock, flags);
+ list_del(&entry->list);
+
+ /* 5) free the entry */
+ kfree(entry);
+ atomic_dec(&entry_count);
+
+ spin_unlock_irqrestore(&async_lock, flags);
+
+ /* 6) wake up any waiters. */
+ wake_up(&async_done);
+ return;
+
+out:
+ spin_unlock_irqrestore(&async_lock, flags);
+}
+
+
+static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct list_head *running)
+{
+ struct async_entry *entry;
+ unsigned long flags;
+ async_cookie_t newcookie;
+
+
+ /* allow irq-off callers */
+ entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC);
+
+ /*
+ * If we're out of memory or if there's too much work
+ * pending already, we execute synchronously.
+ */
+ if (!entry || atomic_read(&entry_count) > MAX_WORK) {
+ kfree(entry);
+ spin_lock_irqsave(&async_lock, flags);
+ newcookie = next_cookie++;
+ spin_unlock_irqrestore(&async_lock, flags);
+
+ /* low on memory.. run synchronously */
+ ptr(data, newcookie);
+ return newcookie;
+ }
+ entry->func = ptr;
+ entry->data = data;
+ entry->running = running;
+
+ spin_lock_irqsave(&async_lock, flags);
+ newcookie = entry->cookie = next_cookie++;
+ list_add_tail(&entry->list, &async_pending);
+ atomic_inc(&entry_count);
+ spin_unlock_irqrestore(&async_lock, flags);
+ wake_up(&async_new);
+ return newcookie;
+}
+
+async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
+{
+ return __async_schedule(ptr, data, &async_pending);
+}
+EXPORT_SYMBOL_GPL(async_schedule);
+
+async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running)
+{
+ return __async_schedule(ptr, data, running);
+}
+EXPORT_SYMBOL_GPL(async_schedule_special);
+
+void async_synchronize_full(void)
+{
+ async_synchronize_cookie(next_cookie);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_full);
+
+void async_synchronize_full_special(struct list_head *list)
+{
+ async_synchronize_cookie_special(next_cookie, list);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_full_special);
+
+void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running)
+{
+ ktime_t starttime, delta, endtime;
+
+ if (initcall_debug) {
+ printk("async_waiting @ %i\n", task_pid_nr(current));
+ starttime = ktime_get();
+ }
+
+ wait_event(async_done, __lowest_in_progress(running) >= cookie);
+
+ if (initcall_debug) {
+ endtime = ktime_get();
+ delta = ktime_sub(endtime, starttime);
+
+ printk("async_continuing @ %i after %lli usec\n",
+ task_pid_nr(current), ktime_to_ns(delta) >> 10);
+ }
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookie_special);
+
+void async_synchronize_cookie(async_cookie_t cookie)
+{
+ async_synchronize_cookie_special(cookie, &async_running);
+}
+EXPORT_SYMBOL_GPL(async_synchronize_cookie);
+
+
+static int async_thread(void *unused)
+{
+ DECLARE_WAITQUEUE(wq, current);
+ add_wait_queue(&async_new, &wq);
+
+ while (!kthread_should_stop()) {
+ int ret = HZ;
+ set_current_state(TASK_INTERRUPTIBLE);
+ /*
+ * check the list head without lock.. false positives
+ * are dealt with inside run_one_entry() while holding
+ * the lock.
+ */
+ rmb();
+ if (!list_empty(&async_pending))
+ run_one_entry();
+ else
+ ret = schedule_timeout(HZ);
+
+ if (ret == 0) {
+ /*
+ * we timed out, this means we as thread are redundant.
+ * we sign off and die, but we to avoid any races there
+ * is a last-straw check to see if work snuck in.
+ */
+ atomic_dec(&thread_count);
+ wmb(); /* manager must see our departure first */
+ if (list_empty(&async_pending))
+ break;
+ /*
+ * woops work came in between us timing out and us
+ * signing off; we need to stay alive and keep working.
+ */
+ atomic_inc(&thread_count);
+ }
+ }
+ remove_wait_queue(&async_new, &wq);
+
+ return 0;
+}
+
+static int async_manager_thread(void *unused)
+{
+ DECLARE_WAITQUEUE(wq, current);
+ add_wait_queue(&async_new, &wq);
+
+ while (!kthread_should_stop()) {
+ int tc, ec;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ tc = atomic_read(&thread_count);
+ rmb();
+ ec = atomic_read(&entry_count);
+
+ while (tc < ec && tc < MAX_THREADS) {
+ kthread_run(async_thread, NULL, "async/%i", tc);
+ atomic_inc(&thread_count);
+ tc++;
+ }
+
+ schedule();
+ }
+ remove_wait_queue(&async_new, &wq);
+
+ return 0;
+}
+
+static int __init async_init(void)
+{
+ kthread_run(async_manager_thread, NULL, "async/mgr");
+ return 0;
+}
+
+core_initcall(async_init);
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index cc0f732..1de9700 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
+#include <linux/async.h>
#include "internals.h"
@@ -34,6 +35,10 @@ unsigned long probe_irq_on(void)
unsigned int status;
int i;
+ /*
+ * quiesce the kernel, or at least the asynchronous portion
+ */
+ async_synchronize_full();
mutex_lock(&probing_active);
/*
* something may have generated an irq long ago and we want to
diff --git a/kernel/module.c b/kernel/module.c
index 496dcb5..c9332c9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -50,6 +50,7 @@
#include <asm/sections.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
+#include <linux/async.h>
#if 0
#define DEBUGP printk
@@ -816,6 +817,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
mod->exit();
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
+ async_synchronize_full();
mutex_lock(&module_mutex);
/* Store the name of the last unloaded module for diagnostic purposes */
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
--
1.6.0.6
--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
>From 4ace92fc112c6069b4fcb95a31d3142d4a43ff2a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <[email protected]>
Date: Sun, 4 Jan 2009 05:32:28 -0800
Subject: [PATCH] fastboot: make scsi probes asynchronous
This patch makes part of the scsi probe (which is mostly device spin up and the
partition scan) asynchronous. Only the part that runs after getting the device
number allocated is asynchronous, ensuring that device numbering remains stable.
Signed-off-by: Arjan van de Ven <[email protected]>
---
drivers/scsi/scsi_scan.c | 3 +
drivers/scsi/sd.c | 109 ++++++++++++++++++++++++++++------------------
2 files changed, 70 insertions(+), 42 deletions(-)
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 18486b5..17914a3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -32,6 +32,7 @@
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/spinlock.h>
+#include <linux/async.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -179,6 +180,8 @@ int scsi_complete_async_scans(void)
spin_unlock(&async_scan_lock);
kfree(data);
+ /* Synchronize async operations globally */
+ async_synchronize_full();
return 0;
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 62b28d5..e035c11 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -48,6 +48,7 @@
#include <linux/delay.h>
#include <linux/mutex.h>
#include <linux/string_helpers.h>
+#include <linux/async.h>
#include <asm/uaccess.h>
#include <scsi/scsi.h>
@@ -1802,6 +1803,71 @@ static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen)
return 0;
}
+/*
+ * The asynchronous part of sd_probe
+ */
+static void sd_probe_async(void *data, async_cookie_t cookie)
+{
+ struct scsi_disk *sdkp = data;
+ struct scsi_device *sdp;
+ struct gendisk *gd;
+ u32 index;
+ struct device *dev;
+
+ sdp = sdkp->device;
+ gd = sdkp->disk;
+ index = sdkp->index;
+ dev = &sdp->sdev_gendev;
+
+ if (!sdp->request_queue->rq_timeout) {
+ if (sdp->type != TYPE_MOD)
+ blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
+ else
+ blk_queue_rq_timeout(sdp->request_queue,
+ SD_MOD_TIMEOUT);
+ }
+
+ device_initialize(&sdkp->dev);
+ sdkp->dev.parent = &sdp->sdev_gendev;
+ sdkp->dev.class = &sd_disk_class;
+ strncpy(sdkp->dev.bus_id, sdp->sdev_gendev.bus_id, BUS_ID_SIZE);
+
+ if (device_add(&sdkp->dev))
+ goto out_free_index;
+
+ get_device(&sdp->sdev_gendev);
+
+ if (index < SD_MAX_DISKS) {
+ gd->major = sd_major((index & 0xf0) >> 4);
+ gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
+ gd->minors = SD_MINORS;
+ }
+ gd->fops = &sd_fops;
+ gd->private_data = &sdkp->driver;
+ gd->queue = sdkp->device->request_queue;
+
+ sd_revalidate_disk(gd);
+
+ blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
+
+ gd->driverfs_dev = &sdp->sdev_gendev;
+ gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS;
+ if (sdp->removable)
+ gd->flags |= GENHD_FL_REMOVABLE;
+
+ dev_set_drvdata(dev, sdkp);
+ add_disk(gd);
+ sd_dif_config_host(sdkp);
+
+ sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
+ sdp->removable ? "removable " : "");
+
+ return;
+
+ out_free_index:
+ ida_remove(&sd_index_ida, index);
+}
+
/**
* sd_probe - called during driver initialization and whenever a
* new scsi device is attached to the system. It is called once
@@ -1865,48 +1931,7 @@ static int sd_probe(struct device *dev)
sdkp->openers = 0;
sdkp->previous_state = 1;
- if (!sdp->request_queue->rq_timeout) {
- if (sdp->type != TYPE_MOD)
- blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
- else
- blk_queue_rq_timeout(sdp->request_queue,
- SD_MOD_TIMEOUT);
- }
-
- device_initialize(&sdkp->dev);
- sdkp->dev.parent = &sdp->sdev_gendev;
- sdkp->dev.class = &sd_disk_class;
- strncpy(sdkp->dev.bus_id, sdp->sdev_gendev.bus_id, BUS_ID_SIZE);
-
- if (device_add(&sdkp->dev))
- goto out_free_index;
-
- get_device(&sdp->sdev_gendev);
-
- if (index < SD_MAX_DISKS) {
- gd->major = sd_major((index & 0xf0) >> 4);
- gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
- gd->minors = SD_MINORS;
- }
- gd->fops = &sd_fops;
- gd->private_data = &sdkp->driver;
- gd->queue = sdkp->device->request_queue;
-
- sd_revalidate_disk(gd);
-
- blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
-
- gd->driverfs_dev = &sdp->sdev_gendev;
- gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS;
- if (sdp->removable)
- gd->flags |= GENHD_FL_REMOVABLE;
-
- dev_set_drvdata(dev, sdkp);
- add_disk(gd);
- sd_dif_config_host(sdkp);
-
- sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n",
- sdp->removable ? "removable " : "");
+ async_schedule(sd_probe_async, sdkp);
return 0;
--
1.6.0.6
--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
>From 793180570ff2530d133343ceea85648de5f01b02 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <[email protected]>
Date: Sun, 4 Jan 2009 05:32:28 -0800
Subject: [PATCH] fastboot: make the libata port scan asynchronous
This patch makes the libata port scanning asynchronous (per device).
There is a synchronization point before doing the actual disk scan
so that device ordering is not affected.
Signed-off-by: Arjan van de Ven <[email protected]>
---
drivers/ata/libata-core.c | 84 ++++++++++++++++++++++++--------------------
1 files changed, 46 insertions(+), 38 deletions(-)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index fecca42..7d3ae6a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -56,6 +56,7 @@
#include <linux/workqueue.h>
#include <linux/scatterlist.h>
#include <linux/io.h>
+#include <linux/async.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_host.h>
@@ -5909,6 +5910,48 @@ void ata_host_init(struct ata_host *host, struct device *dev,
host->ops = ops;
}
+
+static void async_port_probe(void *data, async_cookie_t cookie)
+{
+ int rc;
+ struct ata_port *ap = data;
+ /* probe */
+ if (ap->ops->error_handler) {
+ struct ata_eh_info *ehi = &ap->link.eh_info;
+ unsigned long flags;
+
+ ata_port_probe(ap);
+
+ /* kick EH for boot probing */
+ spin_lock_irqsave(ap->lock, flags);
+
+ ehi->probe_mask |= ATA_ALL_DEVICES;
+ ehi->action |= ATA_EH_RESET | ATA_EH_LPM;
+ ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET;
+
+ ap->pflags &= ~ATA_PFLAG_INITIALIZING;
+ ap->pflags |= ATA_PFLAG_LOADING;
+ ata_port_schedule_eh(ap);
+
+ spin_unlock_irqrestore(ap->lock, flags);
+
+ /* wait for EH to finish */
+ ata_port_wait_eh(ap);
+ } else {
+ DPRINTK("ata%u: bus probe begin\n", ap->print_id);
+ rc = ata_bus_probe(ap);
+ DPRINTK("ata%u: bus probe end\n", ap->print_id);
+
+ if (rc) {
+ /* FIXME: do something useful here?
+ * Current libata behavior will
+ * tear down everything when
+ * the module is removed
+ * or the h/w is unplugged.
+ */
+ }
+ }
+}
/**
* ata_host_register - register initialized ATA host
* @host: ATA host to register
@@ -5988,45 +6031,9 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
DPRINTK("probe begin\n");
for (i = 0; i < host->n_ports; i++) {
struct ata_port *ap = host->ports[i];
-
- /* probe */
- if (ap->ops->error_handler) {
- struct ata_eh_info *ehi = &ap->link.eh_info;
- unsigned long flags;
-
- ata_port_probe(ap);
-
- /* kick EH for boot probing */
- spin_lock_irqsave(ap->lock, flags);
-
- ehi->probe_mask |= ATA_ALL_DEVICES;
- ehi->action |= ATA_EH_RESET | ATA_EH_LPM;
- ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET;
-
- ap->pflags &= ~ATA_PFLAG_INITIALIZING;
- ap->pflags |= ATA_PFLAG_LOADING;
- ata_port_schedule_eh(ap);
-
- spin_unlock_irqrestore(ap->lock, flags);
-
- /* wait for EH to finish */
- ata_port_wait_eh(ap);
- } else {
- DPRINTK("ata%u: bus probe begin\n", ap->print_id);
- rc = ata_bus_probe(ap);
- DPRINTK("ata%u: bus probe end\n", ap->print_id);
-
- if (rc) {
- /* FIXME: do something useful here?
- * Current libata behavior will
- * tear down everything when
- * the module is removed
- * or the h/w is unplugged.
- */
- }
- }
+ async_schedule(async_port_probe, ap);
}
-
+ async_synchronize_full();
/* probes are done, now scan each port's disk(s) */
DPRINTK("host probe begin\n");
for (i = 0; i < host->n_ports; i++) {
@@ -6034,6 +6041,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
ata_scsi_scan_host(ap, 1);
}
+ DPRINTK("host probe end\n");
return 0;
}
--
1.6.0.6
--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
>From f29d3b23238e1955a8094e038c72546e99308e61 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <[email protected]>
Date: Mon, 5 Jan 2009 15:07:07 -0800
Subject: [PATCH] fastboot: Make libata initialization even more async
As suggested by Linus: Don't do the libata init in 2 separate
steps with a global sync inbetween, but do it as one async step,
with a local sync before registering the device.
This cuts the boottime on my machine with 2 sata controllers down
significantly, and it seems to work. Would be nice if the libata
folks take a good look at this patch though..
Signed-off-by: Arjan van de Ven <[email protected]>
---
drivers/ata/libata-core.c | 16 +++++++---------
1 files changed, 7 insertions(+), 9 deletions(-)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 7d3ae6a..f178a45 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5951,6 +5951,12 @@ static void async_port_probe(void *data, async_cookie_t cookie)
*/
}
}
+
+ /* in order to keep device order, we need to synchronize at this point */
+ async_synchronize_cookie(cookie);
+
+ ata_scsi_scan_host(ap, 1);
+
}
/**
* ata_host_register - register initialized ATA host
@@ -6033,15 +6039,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
struct ata_port *ap = host->ports[i];
async_schedule(async_port_probe, ap);
}
- async_synchronize_full();
- /* probes are done, now scan each port's disk(s) */
- DPRINTK("host probe begin\n");
- for (i = 0; i < host->n_ports; i++) {
- struct ata_port *ap = host->ports[i];
-
- ata_scsi_scan_host(ap, 1);
- }
- DPRINTK("host probe end\n");
+ DPRINTK("probe end\n");
return 0;
}
--
1.6.0.6
--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
>From efaee192063a54749c56b7383803e16fe553630e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <[email protected]>
Date: Tue, 6 Jan 2009 07:20:54 -0800
Subject: [PATCH] async: make the final inode deletion an asynchronous event
this makes "rm -rf" on a (names cached) kernel tree go from
11.6 to 8.6 seconds on an ext3 filesystem
Signed-off-by: Arjan van de Ven <[email protected]>
---
fs/inode.c | 20 +++++++++++++-------
fs/super.c | 10 ++++++++++
include/linux/fs.h | 5 +++++
3 files changed, 28 insertions(+), 7 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index 7a6e8c2..0013ac1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -22,6 +22,7 @@
#include <linux/bootmem.h>
#include <linux/inotify.h>
#include <linux/mount.h>
+#include <linux/async.h>
/*
* This is needed for the following functions:
@@ -1138,16 +1139,11 @@ EXPORT_SYMBOL(remove_inode_hash);
* I_FREEING is set so that no-one will take a new reference to the inode while
* it is being deleted.
*/
-void generic_delete_inode(struct inode *inode)
+static void generic_delete_inode_async(void *data, async_cookie_t cookie)
{
+ struct inode *inode = data;
const struct super_operations *op = inode->i_sb->s_op;
- list_del_init(&inode->i_list);
- list_del_init(&inode->i_sb_list);
- inode->i_state |= I_FREEING;
- inodes_stat.nr_inodes--;
- spin_unlock(&inode_lock);
-
security_inode_delete(inode);
if (op->delete_inode) {
@@ -1171,6 +1167,16 @@ void generic_delete_inode(struct inode *inode)
destroy_inode(inode);
}
+void generic_delete_inode(struct inode *inode)
+{
+ list_del_init(&inode->i_list);
+ list_del_init(&inode->i_sb_list);
+ inode->i_state |= I_FREEING;
+ inodes_stat.nr_inodes--;
+ spin_unlock(&inode_lock);
+ async_schedule_special(generic_delete_inode_async, inode, &inode->i_sb->s_async_list);
+}
+
EXPORT_SYMBOL(generic_delete_inode);
static void generic_forget_inode(struct inode *inode)
diff --git a/fs/super.c b/fs/super.c
index ddba069..cb20744 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -38,6 +38,7 @@
#include <linux/kobject.h>
#include <linux/mutex.h>
#include <linux/file.h>
+#include <linux/async.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -71,6 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
+ INIT_LIST_HEAD(&s->s_async_list);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -289,11 +291,18 @@ void generic_shutdown_super(struct super_block *sb)
{
const struct super_operations *sop = sb->s_op;
+
if (sb->s_root) {
shrink_dcache_for_umount(sb);
fsync_super(sb);
lock_super(sb);
sb->s_flags &= ~MS_ACTIVE;
+
+ /*
+ * wait for asynchronous fs operations to finish before going further
+ */
+ async_synchronize_full_special(&sb->s_async_list);
+
/* bad name - it should be evict_inodes() */
invalidate_inodes(sb);
lock_kernel();
@@ -449,6 +458,7 @@ void sync_filesystems(int wait)
if (sb->s_flags & MS_RDONLY)
continue;
sb->s_need_sync_fs = 1;
+ async_synchronize_full_special(&sb->s_async_list);
}
restart:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7eba77..e38a64d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1184,6 +1184,11 @@ struct super_block {
* generic_show_options()
*/
char *s_options;
+
+ /*
+ * storage for asynchronous operations
+ */
+ struct list_head s_async_list;
};
extern struct timespec current_fs_time(struct super_block *sb);
--
1.6.0.6
--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
>From 24b0ecad07ac4d7ef74cb6f7da08c449fa9f6a4f Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <[email protected]>
Date: Sun, 4 Jan 2009 10:59:18 -0800
Subject: [PATCH] bootchart: improve output based on Dave Jones' feedback
Dave Jones, in his blog, had some feedback about the bootchart script:
Primarily his complaint was that shorter delays weren't visualized.
The reason for that was that too small delays will have their labels
mixed up in the graph in an unreadable mess.
This patch has a fix for this; for one, it makes the output wider,
so more will fit.
The second part is that smaller delays are now shown with a
much smaller font for the label; while this isn't per se
readable at a 1:1 zoom, at least you can zoom in with most SVG
viewing applications and see what it is you are looking at.
Signed-off-by: Arjan van de Ven <[email protected]>
---
scripts/bootgraph.pl | 16 +++++++++++-----
1 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/scripts/bootgraph.pl b/scripts/bootgraph.pl
index f0af9aa..0a498e3 100644
--- a/scripts/bootgraph.pl
+++ b/scripts/bootgraph.pl
@@ -88,7 +88,7 @@ END
}
print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
-print "<svg width=\"1000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
+print "<svg width=\"2000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
my @styles;
@@ -105,8 +105,9 @@ $styles[9] = "fill:rgb(255,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0
$styles[10] = "fill:rgb(255,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
$styles[11] = "fill:rgb(128,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-my $mult = 950.0 / ($maxtime - $firsttime);
-my $threshold = ($maxtime - $firsttime) / 60.0;
+my $mult = 1950.0 / ($maxtime - $firsttime);
+my $threshold2 = ($maxtime - $firsttime) / 120.0;
+my $threshold = $threshold2/10;
my $stylecounter = 0;
my %rows;
my $rowscount = 1;
@@ -116,7 +117,7 @@ foreach my $key (@initcalls) {
my $duration = $end{$key} - $start{$key};
if ($duration >= $threshold) {
- my ($s, $s2, $e, $w, $y, $y2, $style);
+ my ($s, $s2, $s3, $e, $w, $y, $y2, $style);
my $pid = $pids{$key};
if (!defined($rows{$pid})) {
@@ -125,6 +126,7 @@ foreach my $key (@initcalls) {
}
$s = ($start{$key} - $firsttime) * $mult;
$s2 = $s + 6;
+ $s3 = $s + 1;
$e = ($end{$key} - $firsttime) * $mult;
$w = $e - $s;
@@ -138,7 +140,11 @@ foreach my $key (@initcalls) {
};
print "<rect x=\"$s\" width=\"$w\" y=\"$y\" height=\"145\" style=\"$style\"/>\n";
- print "<text transform=\"translate($s2,$y2) rotate(90)\">$key</text>\n";
+ if ($duration >= $threshold2) {
+ print "<text transform=\"translate($s2,$y2) rotate(90)\">$key</text>\n";
+ } else {
+ print "<text transform=\"translate($s3,$y2) rotate(90)\" font-size=\"3pt\">$key</text>\n";
+ }
}
}
--
1.6.0.6
--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
>From ad160d23198193135cb2bcc75222e0816b5838c0 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <[email protected]>
Date: Wed, 7 Jan 2009 09:28:53 -0800
Subject: [PATCH] async: don't do the initcall stuff post boot
while tracking the asynchronous calls during boot using the initcall_debug
convention is useful, doing it once the kernel is done is actually
bad now that we use asynchronous operations post boot as well...
Signed-off-by: Arjan van de Ven <[email protected]>
---
kernel/async.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/async.c b/kernel/async.c
index afaa8a6..9737338 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -124,12 +124,12 @@ static void run_one_entry(void)
spin_unlock_irqrestore(&async_lock, flags);
/* 3) run it (and print duration)*/
- if (initcall_debug) {
+ if (initcall_debug && system_state == SYSTEM_BOOTING) {
printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current));
calltime = ktime_get();
}
entry->func(entry->data, entry->cookie);
- if (initcall_debug) {
+ if (initcall_debug && system_state == SYSTEM_BOOTING) {
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie,
@@ -220,14 +220,14 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r
{
ktime_t starttime, delta, endtime;
- if (initcall_debug) {
+ if (initcall_debug && system_state == SYSTEM_BOOTING) {
printk("async_waiting @ %i\n", task_pid_nr(current));
starttime = ktime_get();
}
wait_event(async_done, __lowest_in_progress(running) >= cookie);
- if (initcall_debug) {
+ if (initcall_debug && system_state == SYSTEM_BOOTING) {
endtime = ktime_get();
delta = ktime_sub(endtime, starttime);
--
1.6.0.6
--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
On Wed, 7 Jan 2009, Arjan van de Ven wrote:
>
> version 3 of the async function call patches
>
> * Dropped the ACPI part; it broke i surprising ways; needs a rethink
> (working with Len and co on that)
> * Included asynchronous delete()
Ok, I pulled this, because I really do want the boot speedups and the
previous version missed the last merge window, but after booting it, I
started to worry:
My dmesg shows:
[ 2.264955] sd 5:0:0:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[ 2.264958] sdb:<6>Freeing unused kernel memory: 408k freed
Ouch. How come that "Freeing unused kernel memory" got done in the middle
of the sdb partition thing?
There's a async_synchronize_full() there before the free_initmem(), but
I'm worrying that it just isn't working. Hmm? What am I missing?
Linus
Em Wed, Jan 07, 2009 at 03:12:26PM -0800, Arjan van de Ven escreveu:
<SNIP>
> +/*
> + * MUST be called with the lock held!
> + */
> +static async_cookie_t __lowest_in_progress(struct list_head *running)
> +{
> + struct async_entry *entry;
> + if (!list_empty(&async_pending)) {
> + entry = list_first_entry(&async_pending,
> + struct async_entry, list);
Small nitpick:
- > + return entry->cookie;
> + } else if (!list_empty(running)) {
> + entry = list_first_entry(running,
> + struct async_entry, list);
- > + return entry->cookie;
> + } else {
> + /* nothing in progress... next_cookie is "infinity" */
> + return next_cookie;
> + }
+ return entry->cookie;
> +/*
> + * pick the first pending entry and run it
> + */
> +static void run_one_entry(void)
> +{
> + unsigned long flags;
> + struct async_entry *entry;
> + ktime_t calltime, delta, rettime;
> +
> + /* 1) pick one task from the pending queue */
> +
> + spin_lock_irqsave(&async_lock, flags);
> + if (list_empty(&async_pending))
> + goto out;
> + entry = list_first_entry(&async_pending, struct async_entry, list);
> +
> + /* 2) move it to the running queue */
> + list_del(&entry->list);
> + list_add_tail(&entry->list, &async_running);
We have list_move_tail()
> + spin_unlock_irqrestore(&async_lock, flags);
> +
> + /* 3) run it (and print duration)*/
> + if (initcall_debug) {
> + printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current));
> + calltime = ktime_get();
- Arnaldo
On Wed, 7 Jan 2009 22:31:41 -0200
Arnaldo Carvalho de Melo <[email protected]> wrote:
> Small nitpick:
>
> - > + return entry->cookie;
> > + } else if (!list_empty(running)) {
> > + entry = list_first_entry(running,
> > + struct async_entry, list);
> - > + return entry->cookie;
> > + } else {
> > + /* nothing in progress... next_cookie is
> > "infinity" */
> > + return next_cookie;
> > + }
>
> + return entry->cookie;
no you cannot do this; entry can be freed by this time already from a
thread.
On Wed, 7 Jan 2009 16:17:24 -0800 (PST)
Linus Torvalds <[email protected]> wrote:
>
>
> On Wed, 7 Jan 2009, Arjan van de Ven wrote:
> >
> > version 3 of the async function call patches
> >
> > * Dropped the ACPI part; it broke i surprising ways; needs a rethink
> > (working with Len and co on that)
> > * Included asynchronous delete()
>
> Ok, I pulled this, because I really do want the boot speedups and the
> previous version missed the last merge window, but after booting it,
> I started to worry:
>
> My dmesg shows:
>
> [ 2.264955] sd 5:0:0:0: [sdb] Write cache: enabled, read cache:
> enabled, doesn't support DPO or FUA [ 2.264958] sdb:<6>Freeing
> unused kernel memory: 408k freed
>
> Ouch. How come that "Freeing unused kernel memory" got done in the
> middle of the sdb partition thing?
>
> There's a async_synchronize_full() there before the free_initmem(),
> but I'm worrying that it just isn't working. Hmm? What am I missing?
>
ok this part looks funny but it's not really (and it's safe I think).
The async sata thing launches another async thing (the scsi partition
scan).
The synchronize_full() waits for the sata to complete, but doesn't wait
for things that the sata async schedules after the wait started.
is this a problem? not right now, but it means we have a rule that if
an async item schedules another async item, the second one cannot be
__init. (which is ok right now.. scsi already had some of this async
anyway).
I could make the async_full() be more strict if that makes you feel
better, but for this specific purpose it would be over-synchronizing.
--
Arjan van de Ven Intel Open Source Technology Centre
For development, discussion and tips for power savings,
visit http://www.lesswatts.org
I just wanted to say that because of these patches the kernel boots
faster than some hard drives can spin up/init... I have these patches
included in a patchset I maintain some users now need rootwait in the
kernel command line :D
-Ryan
On 1/7/09, Arjan van de Ven <[email protected]> wrote:
> version 3 of the async function call patches
>
> * Dropped the ACPI part; it broke i surprising ways; needs a rethink
> (working with Len and co on that)
> * Included asynchronous delete()
>
>
> There is still no integration with the out of tree slow-work patch;
> the slow-work patch does really weird things with refcounts that make it
> really specific to cachefs and, frankly, I think it's a rather
> weird design.
>
> The overlap is maybe 200 lines of code at most, of which 100+ would
> need to be added just to deal with the various weirdnesses. I really
> don't think this is a good idea or a fair requirement on the async
> function call patch series.
>
> The following changes since commit 8903709b054a8dafe4e8c6d9a6444034d7aba36f:
> Harvey Harrison (1):
> xtensa: introduce swab.h
>
> are available in the git repository at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/arjan/linux-2.6-async.git
> master
>
> Arjan van de Ven (7):
> async: Asynchronous function calls to speed up kernel boot
> fastboot: make scsi probes asynchronous
> fastboot: make the libata port scan asynchronous
> fastboot: Make libata initialization even more async
> async: make the final inode deletion an asynchronous event
> bootchart: improve output based on Dave Jones' feedback
> async: don't do the initcall stuff post boot
>
> drivers/ata/libata-core.c | 96 +++++++-------
> drivers/scsi/scsi_scan.c | 3 +
> drivers/scsi/sd.c | 109 ++++++++++------
> fs/inode.c | 20 ++-
> fs/super.c | 10 ++
> include/linux/async.h | 25 ++++
> include/linux/fs.h | 5 +
> init/do_mounts.c | 2 +
> init/main.c | 5 +-
> kernel/Makefile | 3 +-
> kernel/async.c | 321
> +++++++++++++++++++++++++++++++++++++++++++++
> kernel/irq/autoprobe.c | 5 +
> kernel/module.c | 2 +
> scripts/bootgraph.pl | 16 ++-
> 14 files changed, 521 insertions(+), 101 deletions(-)
> create mode 100644 include/linux/async.h
> create mode 100644 kernel/async.c
>
>
> --
> Arjan van de Ven Intel Open Source Technology Centre
> For development, discussion and tips for power savings,
> visit http://www.lesswatts.org
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
[A somewhat belated question...]
As I read the patch, I find the async_entry structure:
> +struct async_entry {
> + struct list_head list;
> + async_cookie_t cookie;
> + async_func_ptr *func;
> + void *data;
> + struct list_head *running;
> +};
The "running" field is, presumably, meant to hold a pointer to the
"running" queue to be used when this function is actually run. But, then,
I see:
> +async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
> +{
> + return __async_schedule(ptr, data, &async_pending);
> +}
> +EXPORT_SYMBOL_GPL(async_schedule);
It seems to me that you wanted &async_running there, no?
However, it doesn't matter in the current form of the patch:
> +/*
> + * pick the first pending entry and run it
> + */
> +static void run_one_entry(void)
> +{
> + unsigned long flags;
> + struct async_entry *entry;
> + ktime_t calltime, delta, rettime;
> +
> + /* 1) pick one task from the pending queue */
> +
> + spin_lock_irqsave(&async_lock, flags);
> + if (list_empty(&async_pending))
> + goto out;
> + entry = list_first_entry(&async_pending, struct async_entry, list);
> +
> + /* 2) move it to the running queue */
> + list_del(&entry->list);
> + list_add_tail(&entry->list, &async_running);
> + spin_unlock_irqrestore(&async_lock, flags);
Given the way things are designed, don't you want to add the entry to
entry->running, rather than unconditionally to async_running? If not, I
don't see how calls to async_synchronize_cookie_special() can work right.
Of course, I'm probably just confused...enlighten me?
Thanks,
jon
On Tue, 13 Jan 2009 13:48:59 -0700,
Jonathan Corbet <[email protected]> wrote:
> [A somewhat belated question...]
>
> As I read the patch, I find the async_entry structure:
>
> > +struct async_entry {
> > + struct list_head list;
> > + async_cookie_t cookie;
> > + async_func_ptr *func;
> > + void *data;
> > + struct list_head *running;
> > +};
>
> The "running" field is, presumably, meant to hold a pointer to the
> "running" queue to be used when this function is actually run. But, then,
> I see:
>
> > +async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
> > +{
> > + return __async_schedule(ptr, data, &async_pending);
> > +}
> > +EXPORT_SYMBOL_GPL(async_schedule);
>
> It seems to me that you wanted &async_running there, no?
>
> However, it doesn't matter in the current form of the patch:
>
> > +/*
> > + * pick the first pending entry and run it
> > + */
> > +static void run_one_entry(void)
> > +{
> > + unsigned long flags;
> > + struct async_entry *entry;
> > + ktime_t calltime, delta, rettime;
> > +
> > + /* 1) pick one task from the pending queue */
> > +
> > + spin_lock_irqsave(&async_lock, flags);
> > + if (list_empty(&async_pending))
> > + goto out;
> > + entry = list_first_entry(&async_pending, struct async_entry, list);
> > +
> > + /* 2) move it to the running queue */
> > + list_del(&entry->list);
> > + list_add_tail(&entry->list, &async_running);
> > + spin_unlock_irqrestore(&async_lock, flags);
>
> Given the way things are designed, don't you want to add the entry to
> entry->running, rather than unconditionally to async_running? If not, I
> don't see how calls to async_synchronize_cookie_special() can work right.
>
> Of course, I'm probably just confused...enlighten me?
No, you're not confused, the code is :)
async_schedule() should pass in async_running as the running
list, and run_one_entry() should put the entry to be run on
the provided running list instead of always on the generic one.
Reported-by: Jonathan Corbet <[email protected]>
Signed-off-by: Cornelia Huck <[email protected]>
---
kernel/async.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- linux-2.6.orig/kernel/async.c
+++ linux-2.6/kernel/async.c
@@ -133,7 +133,7 @@ static void run_one_entry(void)
/* 2) move it to the running queue */
list_del(&entry->list);
- list_add_tail(&entry->list, &async_running);
+ list_add_tail(&entry->list, entry->running);
spin_unlock_irqrestore(&async_lock, flags);
/* 3) run it (and print duration)*/
@@ -207,7 +207,7 @@ static async_cookie_t __async_schedule(a
async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
{
- return __async_schedule(ptr, data, &async_pending);
+ return __async_schedule(ptr, data, &async_running);
}
EXPORT_SYMBOL_GPL(async_schedule);
> > My dmesg shows:
> >
> > [ 2.264955] sd 5:0:0:0: [sdb] Write cache: enabled, read cache:
> > enabled, doesn't support DPO or FUA [ 2.264958] sdb:<6>Freeing
> > unused kernel memory: 408k freed
> >
> > Ouch. How come that "Freeing unused kernel memory" got done in the
> > middle of the sdb partition thing?
> >
> > There's a async_synchronize_full() there before the free_initmem(),
> > but I'm worrying that it just isn't working. Hmm? What am I missing?
> >
>
> ok this part looks funny but it's not really (and it's safe I think).
>
> The async sata thing launches another async thing (the scsi partition
> scan).
> The synchronize_full() waits for the sata to complete, but doesn't wait
> for things that the sata async schedules after the wait started.
>
> is this a problem? not right now, but it means we have a rule that if
> an async item schedules another async item, the second one cannot be
> __init. (which is ok right now.. scsi already had some of this async
> anyway).
>
> I could make the async_full() be more strict if that makes you feel
> better, but for this specific purpose it would be over-synchronizing.
Really? If you run userspace before partitions are ready, its attempts
to read partitions will result in problems, right?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html