While trying to use blktrace in -tip tree, I encounted kernel NULL
dereference, so I looked into the code, and then found some other
bugs.
This patchset is part 1, and I have some other pending fixes.
Each patch is small and straightforward, and should be easy to review:
[PATCH 1/7] blktrace: fix possible memory leak
[PATCH 2/7] blktrace: make blk_tracer_enabled a bool flag
[PATCH 3/7] blktrace: remove blk_probe_mutex
[PATCH 4/7] blktrace: don't increase blk_probes_ref if failed to setup blk trace
[PATCH 5/7] blktrace: report EBUSY correctly
[PATCH 6/7] blktrace: remove sysfs_blk_trace_enable_show/store()
[PATCH 7/7] blktrace: avoid accessing NULL bdev->bd_disk
---
blktrace.c | 154 +++++++++++++++++--------------------------------------------
1 file changed, 45 insertions(+), 109 deletions(-)
Signed-off-by: Li Zefan <[email protected]>
When we failed to create "block" debugfs dir, we should do come cleanups.
Signed-off-by: Li Zefan <[email protected]>
---
kernel/trace/blktrace.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b171778..fb3bc53 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -432,7 +432,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!blk_tree_root) {
blk_tree_root = debugfs_create_dir("block", NULL);
if (!blk_tree_root)
- return -ENOMEM;
+ goto err;
}
dir = debugfs_create_dir(buts->name, blk_tree_root);
--
1.5.4.rc3
It doesn't have to be a counter, and it can be a bool flag instead.
Signed-off-by: Li Zefan <[email protected]>
---
kernel/trace/blktrace.c | 12 +++---------
1 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fb3bc53..73845b7 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -30,7 +30,7 @@
static unsigned int blktrace_seq __read_mostly = 1;
static struct trace_array *blk_tr;
-static int __read_mostly blk_tracer_enabled;
+static bool blk_tracer_enabled __read_mostly;
/* Select an alternative, minimalistic output than the original one */
#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -1111,9 +1111,7 @@ static int blk_tracer_init(struct trace_array *tr)
{
blk_tr = tr;
blk_tracer_start(tr);
- mutex_lock(&blk_probe_mutex);
- blk_tracer_enabled++;
- mutex_unlock(&blk_probe_mutex);
+ blk_tracer_enabled = true;
return 0;
}
@@ -1131,11 +1129,7 @@ static void blk_tracer_reset(struct trace_array *tr)
if (!atomic_read(&blk_probes_ref))
return;
- mutex_lock(&blk_probe_mutex);
- blk_tracer_enabled--;
- WARN_ON(blk_tracer_enabled < 0);
- mutex_unlock(&blk_probe_mutex);
-
+ blk_tracer_enabled = false;
blk_tracer_stop(tr);
}
--
1.5.4.rc3
do_blk_trace_setup() may return EBUSY, but the current code doesn't
decrease blk_probes_ref in this case.
Signed-off-by: Li Zefan <[email protected]>
---
kernel/trace/blktrace.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 223b92e..11e7c8d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -468,9 +468,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
- if (atomic_add_return(1, &blk_probes_ref) == 1)
- blk_register_tracepoints();
-
ret = -EBUSY;
old_bt = xchg(&q->blk_trace, bt);
if (old_bt) {
@@ -478,6 +475,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
goto err;
}
+ if (atomic_add_return(1, &blk_probes_ref) == 1)
+ blk_register_tracepoints();
+
return 0;
err:
if (bt) {
--
1.5.4.rc3
sysfs_blk_trace_enable_show()/store() share most of code with
sysfs_blk_trace_attr_show()/store().
Signed-off-by: Li Zefan <[email protected]>
---
kernel/trace/blktrace.c | 89 +++++++++++------------------------------------
1 files changed, 21 insertions(+), 68 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 14986af..46e0cfe 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1284,72 +1284,6 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
* sysfs interface to enable and configure tracing
*/
-static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct block_device *bdev;
- ssize_t ret = -ENXIO;
-
- lock_kernel();
- bdev = bdget(part_devt(p));
- if (bdev != NULL) {
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (q != NULL) {
- mutex_lock(&bdev->bd_mutex);
- ret = sprintf(buf, "%u\n", !!q->blk_trace);
- mutex_unlock(&bdev->bd_mutex);
- }
-
- bdput(bdev);
- }
-
- unlock_kernel();
- return ret;
-}
-
-static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct block_device *bdev;
- struct request_queue *q;
- struct hd_struct *p;
- int value;
- ssize_t ret = -ENXIO;
-
- if (count == 0 || sscanf(buf, "%d", &value) != 1)
- goto out;
-
- lock_kernel();
- p = dev_to_part(dev);
- bdev = bdget(part_devt(p));
- if (bdev == NULL)
- goto out_unlock_kernel;
-
- q = bdev_get_queue(bdev);
- if (q == NULL)
- goto out_bdput;
-
- mutex_lock(&bdev->bd_mutex);
- if (value)
- ret = blk_trace_setup_queue(q, bdev->bd_dev);
- else
- ret = blk_trace_remove_queue(q);
- mutex_unlock(&bdev->bd_mutex);
-
- if (ret == 0)
- ret = count;
-out_bdput:
- bdput(bdev);
-out_unlock_kernel:
- unlock_kernel();
-out:
- return ret;
-}
-
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf);
@@ -1361,8 +1295,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
sysfs_blk_trace_attr_show, \
sysfs_blk_trace_attr_store)
-static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
- sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
+static BLK_TRACE_DEVICE_ATTR(enable);
static BLK_TRACE_DEVICE_ATTR(act_mask);
static BLK_TRACE_DEVICE_ATTR(pid);
static BLK_TRACE_DEVICE_ATTR(start_lba);
@@ -1447,6 +1380,12 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (q == NULL)
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
+
+ if (attr == &dev_attr_enable) {
+ ret = sprintf(buf, "%u\n", !!q->blk_trace);
+ goto out_unlock_bdev;
+ }
+
if (q->blk_trace == NULL)
ret = sprintf(buf, "disabled\n");
else if (attr == &dev_attr_act_mask)
@@ -1457,6 +1396,8 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
else if (attr == &dev_attr_end_lba)
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+
+out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
@@ -1499,6 +1440,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
+
+ if (attr == &dev_attr_enable) {
+ if (value)
+ ret = blk_trace_setup_queue(q, bdev->bd_dev);
+ else
+ ret = blk_trace_remove_queue(q);
+ goto out_unlock_bdev;
+ }
+
ret = 0;
if (q->blk_trace == NULL)
ret = blk_trace_setup_queue(q, bdev->bd_dev);
@@ -1514,6 +1464,8 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
q->blk_trace->end_lba = value;
ret = count;
}
+
+out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
@@ -1522,3 +1474,4 @@ out_unlock_kernel:
out:
return ret;
}
+
--
1.5.4.rc3
blk_trace_remove_queue() returns EINVAL if q->blk_trace == NULL,
but blk_trace_setup_queue() doesn't return EBUSY if q->blk_trace != NULL.
# echo 0 > sdaX/trace/enable
# echo 0 > sdaX/trace/enable
bash: echo: write error: Invalid argument
# echo 1 > sdaX/trace/enable
# echo 1 > sdaX/trace/enable
(should return EBUSY)
Signed-off-by: Li Zefan <[email protected]>
---
kernel/trace/blktrace.c | 9 +++------
1 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 11e7c8d..14986af 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1260,12 +1260,10 @@ static int blk_trace_remove_queue(struct request_queue *q)
static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
{
struct blk_trace *old_bt, *bt = NULL;
- int ret;
- ret = -ENOMEM;
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
if (!bt)
- goto err;
+ return -ENOMEM;
bt->dev = dev;
bt->act_mask = (u16)-1;
@@ -1276,11 +1274,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
if (old_bt != NULL) {
(void)xchg(&q->blk_trace, old_bt);
kfree(bt);
- ret = -EBUSY;
+ return -EBUSY;
}
+
return 0;
-err:
- return ret;
}
/*
--
1.5.4.rc3
bdev->bd_disk can be NULL, if the block device is not opened.
Try this against an unmounted partition, and you'll see NULL dereference:
# echo 1 > /sys/block/sda/sda5/enable
Signed-off-by: Li Zefan <[email protected]>
---
kernel/trace/blktrace.c | 13 +++++++++++--
1 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 46e0cfe..37095d9 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1362,6 +1362,14 @@ static int blk_str2act_mask(const char *str)
return mask;
}
+static request_queue *blk_trace_get_queue(struct block_device *bdev)
+{
+ if (bdev->bd_disk == NULL)
+ return NULL;
+
+ return bdev_get_queue(bdev);
+}
+
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1376,9 +1384,10 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (bdev == NULL)
goto out_unlock_kernel;
- q = bdev_get_queue(bdev);
+ q = blk_trace_get_queue(bdev);
if (q == NULL)
goto out_bdput;
+
mutex_lock(&bdev->bd_mutex);
if (attr == &dev_attr_enable) {
@@ -1435,7 +1444,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
if (bdev == NULL)
goto out_unlock_kernel;
- q = bdev_get_queue(bdev);
+ q = blk_trace_get_queue(bdev);
if (q == NULL)
goto out_bdput;
--
1.5.4.rc3
blk_register_tracepoints() always returns 0, so make it return void, thus
we don't need to use blk_probe_mutex to protect blk_probes_ref.
Signed-off-by: Li Zefan <[email protected]>
---
kernel/trace/blktrace.c | 27 +++++----------------------
1 files changed, 5 insertions(+), 22 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 73845b7..223b92e 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = {
};
/* Global reference count of probes */
-static DEFINE_MUTEX(blk_probe_mutex);
static atomic_t blk_probes_ref = ATOMIC_INIT(0);
-static int blk_register_tracepoints(void);
+static void blk_register_tracepoints(void);
static void blk_unregister_tracepoints(void);
/*
@@ -256,10 +255,8 @@ static void blk_trace_cleanup(struct blk_trace *bt)
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
- mutex_lock(&blk_probe_mutex);
if (atomic_dec_and_test(&blk_probes_ref))
blk_unregister_tracepoints();
- mutex_unlock(&blk_probe_mutex);
}
int blk_trace_remove(struct request_queue *q)
@@ -471,13 +468,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
- mutex_lock(&blk_probe_mutex);
- if (atomic_add_return(1, &blk_probes_ref) == 1) {
- ret = blk_register_tracepoints();
- if (ret)
- goto probe_err;
- }
- mutex_unlock(&blk_probe_mutex);
+ if (atomic_add_return(1, &blk_probes_ref) == 1)
+ blk_register_tracepoints();
ret = -EBUSY;
old_bt = xchg(&q->blk_trace, bt);
@@ -487,9 +479,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
}
return 0;
-probe_err:
- atomic_dec(&blk_probes_ref);
- mutex_unlock(&blk_probe_mutex);
err:
if (bt) {
if (bt->msg_file)
@@ -863,7 +852,7 @@ void blk_add_driver_data(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
-static int blk_register_tracepoints(void)
+static void blk_register_tracepoints(void)
{
int ret;
@@ -901,7 +890,6 @@ static int blk_register_tracepoints(void)
WARN_ON(ret);
ret = register_trace_block_remap(blk_add_trace_remap);
WARN_ON(ret);
- return 0;
}
static void blk_unregister_tracepoints(void)
@@ -1099,11 +1087,8 @@ static void blk_tracer_print_header(struct seq_file *m)
static void blk_tracer_start(struct trace_array *tr)
{
- mutex_lock(&blk_probe_mutex);
if (atomic_add_return(1, &blk_probes_ref) == 1)
- if (blk_register_tracepoints())
- atomic_dec(&blk_probes_ref);
- mutex_unlock(&blk_probe_mutex);
+ blk_register_tracepoints();
trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
}
@@ -1118,10 +1103,8 @@ static int blk_tracer_init(struct trace_array *tr)
static void blk_tracer_stop(struct trace_array *tr)
{
trace_flags |= TRACE_ITER_CONTEXT_INFO;
- mutex_lock(&blk_probe_mutex);
if (atomic_dec_and_test(&blk_probes_ref))
blk_unregister_tracepoints();
- mutex_unlock(&blk_probe_mutex);
}
static void blk_tracer_reset(struct trace_array *tr)
--
1.5.4.rc3
> +static request_queue *blk_trace_get_queue(struct block_device *bdev)
> +{
Sorry, should be "struct request_queue", I forgot to re-format the patch
after I fixed this.
================
From: Li Zefan <[email protected]>
Subject: [PATCH 7/7] blktrace: avoid accessing NULL bdev->bd_disk
bdev->bd_disk can be NULL, if the block device is not opened.
Try this against an unmounted partition, and you'll see NULL dereference:
# echo 1 > /sys/block/sda/sda5/enable
Signed-off-by: Li Zefan <[email protected]>
---
kernel/trace/blktrace.c | 13 +++++++++++--
1 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 46e0cfe..d2dd9ed 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1362,6 +1362,14 @@ static int blk_str2act_mask(const char *str)
return mask;
}
+static struct request_queue *blk_trace_get_queue(struct block_device *bdev)
+{
+ if (bdev->bd_disk == NULL)
+ return NULL;
+
+ return bdev_get_queue(bdev);
+}
+
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1376,9 +1384,10 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (bdev == NULL)
goto out_unlock_kernel;
- q = bdev_get_queue(bdev);
+ q = blk_trace_get_queue(bdev);
if (q == NULL)
goto out_bdput;
+
mutex_lock(&bdev->bd_mutex);
if (attr == &dev_attr_enable) {
@@ -1435,7 +1444,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
if (bdev == NULL)
goto out_unlock_kernel;
- q = bdev_get_queue(bdev);
+ q = blk_trace_get_queue(bdev);
if (q == NULL)
goto out_bdput;
--
1.5.4.rc3
> + if (attr == &dev_attr_enable) {
> + if (value)
> + ret = blk_trace_setup_queue(q, bdev->bd_dev);
> + else
> + ret = blk_trace_remove_queue(q);
(ret = count) should be returned if ret == 0.. patch updated below
> + goto out_unlock_bdev;
> + }
===================
From: Li Zefan <[email protected]>
Subject: [PATCH 6/7] blktrace: sysfs_blk_trace_enable_show/store()
sysfs_blk_trace_enable_show()/store() share most of code with
sysfs_blk_trace_attr_show()/store().
Signed-off-by: Li Zefan <[email protected]>
---
kernel/trace/blktrace.c | 92 +++++++++++-----------------------------------
1 files changed, 22 insertions(+), 70 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 14986af..dfee6f9 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1284,72 +1284,6 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
* sysfs interface to enable and configure tracing
*/
-static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct block_device *bdev;
- ssize_t ret = -ENXIO;
-
- lock_kernel();
- bdev = bdget(part_devt(p));
- if (bdev != NULL) {
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (q != NULL) {
- mutex_lock(&bdev->bd_mutex);
- ret = sprintf(buf, "%u\n", !!q->blk_trace);
- mutex_unlock(&bdev->bd_mutex);
- }
-
- bdput(bdev);
- }
-
- unlock_kernel();
- return ret;
-}
-
-static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct block_device *bdev;
- struct request_queue *q;
- struct hd_struct *p;
- int value;
- ssize_t ret = -ENXIO;
-
- if (count == 0 || sscanf(buf, "%d", &value) != 1)
- goto out;
-
- lock_kernel();
- p = dev_to_part(dev);
- bdev = bdget(part_devt(p));
- if (bdev == NULL)
- goto out_unlock_kernel;
-
- q = bdev_get_queue(bdev);
- if (q == NULL)
- goto out_bdput;
-
- mutex_lock(&bdev->bd_mutex);
- if (value)
- ret = blk_trace_setup_queue(q, bdev->bd_dev);
- else
- ret = blk_trace_remove_queue(q);
- mutex_unlock(&bdev->bd_mutex);
-
- if (ret == 0)
- ret = count;
-out_bdput:
- bdput(bdev);
-out_unlock_kernel:
- unlock_kernel();
-out:
- return ret;
-}
-
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf);
@@ -1361,8 +1295,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
sysfs_blk_trace_attr_show, \
sysfs_blk_trace_attr_store)
-static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
- sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
+static BLK_TRACE_DEVICE_ATTR(enable);
static BLK_TRACE_DEVICE_ATTR(act_mask);
static BLK_TRACE_DEVICE_ATTR(pid);
static BLK_TRACE_DEVICE_ATTR(start_lba);
@@ -1447,6 +1380,12 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (q == NULL)
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
+
+ if (attr == &dev_attr_enable) {
+ ret = sprintf(buf, "%u\n", !!q->blk_trace);
+ goto out_unlock_bdev;
+ }
+
if (q->blk_trace == NULL)
ret = sprintf(buf, "disabled\n");
else if (attr == &dev_attr_act_mask)
@@ -1457,6 +1396,8 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
else if (attr == &dev_attr_end_lba)
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+
+out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
@@ -1499,6 +1440,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
+
+ if (attr == &dev_attr_enable) {
+ if (value)
+ ret = blk_trace_setup_queue(q, bdev->bd_dev);
+ else
+ ret = blk_trace_remove_queue(q);
+ goto out_unlock_bdev;
+ }
+
ret = 0;
if (q->blk_trace == NULL)
ret = blk_trace_setup_queue(q, bdev->bd_dev);
@@ -1512,13 +1462,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
q->blk_trace->start_lba = value;
else if (attr == &dev_attr_end_lba)
q->blk_trace->end_lba = value;
- ret = count;
}
+
+out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
out_unlock_kernel:
unlock_kernel();
out:
- return ret;
+ return ret ? ret : count;
}
+
--
1.5.4.rc3
On Fri, Mar 20, 2009 at 09:48:03AM +0800, Li Zefan wrote:
> It doesn't have to be a counter, and it can be a bool flag instead.
>
> Signed-off-by: Li Zefan <[email protected]>
> ---
> kernel/trace/blktrace.c | 12 +++---------
> 1 files changed, 3 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> index fb3bc53..73845b7 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -30,7 +30,7 @@
> static unsigned int blktrace_seq __read_mostly = 1;
>
> static struct trace_array *blk_tr;
> -static int __read_mostly blk_tracer_enabled;
> +static bool blk_tracer_enabled __read_mostly;
>
> /* Select an alternative, minimalistic output than the original one */
> #define TRACE_BLK_OPT_CLASSIC 0x1
> @@ -1111,9 +1111,7 @@ static int blk_tracer_init(struct trace_array *tr)
> {
> blk_tr = tr;
> blk_tracer_start(tr);
> - mutex_lock(&blk_probe_mutex);
> - blk_tracer_enabled++;
> - mutex_unlock(&blk_probe_mutex);
> + blk_tracer_enabled = true;
> return 0;
> }
>
> @@ -1131,11 +1129,7 @@ static void blk_tracer_reset(struct trace_array *tr)
> if (!atomic_read(&blk_probes_ref))
> return;
>
> - mutex_lock(&blk_probe_mutex);
> - blk_tracer_enabled--;
> - WARN_ON(blk_tracer_enabled < 0);
> - mutex_unlock(&blk_probe_mutex);
> -
> + blk_tracer_enabled = false;
> blk_tracer_stop(tr);
> }
>
> --
> 1.5.4.rc3
Looks good.
Indeed the init() and reset() callbacks are not supposed to be called nested.
On Fri, Mar 20, 2009 at 09:48:26AM +0800, Li Zefan wrote:
> blk_register_tracepoints() always returns 0, so make it return void, thus
> we don't need to use blk_probe_mutex to protect blk_probes_ref.
>
> Signed-off-by: Li Zefan <[email protected]>
> ---
> kernel/trace/blktrace.c | 27 +++++----------------------
> 1 files changed, 5 insertions(+), 22 deletions(-)
>
> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> index 73845b7..223b92e 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = {
> };
>
> /* Global reference count of probes */
> -static DEFINE_MUTEX(blk_probe_mutex);
> static atomic_t blk_probes_ref = ATOMIC_INIT(0);
>
> -static int blk_register_tracepoints(void);
> +static void blk_register_tracepoints(void);
> static void blk_unregister_tracepoints(void);
>
> /*
> @@ -256,10 +255,8 @@ static void blk_trace_cleanup(struct blk_trace *bt)
> free_percpu(bt->sequence);
> free_percpu(bt->msg_data);
> kfree(bt);
> - mutex_lock(&blk_probe_mutex);
> if (atomic_dec_and_test(&blk_probes_ref))
> blk_unregister_tracepoints();
> - mutex_unlock(&blk_probe_mutex);
> }
>
> int blk_trace_remove(struct request_queue *q)
> @@ -471,13 +468,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
> bt->pid = buts->pid;
> bt->trace_state = Blktrace_setup;
>
> - mutex_lock(&blk_probe_mutex);
> - if (atomic_add_return(1, &blk_probes_ref) == 1) {
> - ret = blk_register_tracepoints();
> - if (ret)
> - goto probe_err;
> - }
> - mutex_unlock(&blk_probe_mutex);
> + if (atomic_add_return(1, &blk_probes_ref) == 1)
atomic_inc_return() is a bit more simple.
> + blk_register_tracepoints();
>
> ret = -EBUSY;
> old_bt = xchg(&q->blk_trace, bt);
> @@ -487,9 +479,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
> }
>
> return 0;
> -probe_err:
> - atomic_dec(&blk_probes_ref);
> - mutex_unlock(&blk_probe_mutex);
> err:
> if (bt) {
> if (bt->msg_file)
> @@ -863,7 +852,7 @@ void blk_add_driver_data(struct request_queue *q,
> }
> EXPORT_SYMBOL_GPL(blk_add_driver_data);
>
> -static int blk_register_tracepoints(void)
> +static void blk_register_tracepoints(void)
> {
> int ret;
>
> @@ -901,7 +890,6 @@ static int blk_register_tracepoints(void)
> WARN_ON(ret);
> ret = register_trace_block_remap(blk_add_trace_remap);
> WARN_ON(ret);
> - return 0;
> }
>
> static void blk_unregister_tracepoints(void)
> @@ -1099,11 +1087,8 @@ static void blk_tracer_print_header(struct seq_file *m)
>
> static void blk_tracer_start(struct trace_array *tr)
> {
> - mutex_lock(&blk_probe_mutex);
> if (atomic_add_return(1, &blk_probes_ref) == 1)
> - if (blk_register_tracepoints())
> - atomic_dec(&blk_probes_ref);
> - mutex_unlock(&blk_probe_mutex);
> + blk_register_tracepoints();
> trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
> }
>
> @@ -1118,10 +1103,8 @@ static int blk_tracer_init(struct trace_array *tr)
> static void blk_tracer_stop(struct trace_array *tr)
> {
> trace_flags |= TRACE_ITER_CONTEXT_INFO;
> - mutex_lock(&blk_probe_mutex);
> if (atomic_dec_and_test(&blk_probes_ref))
> blk_unregister_tracepoints();
> - mutex_unlock(&blk_probe_mutex);
> }
>
> static void blk_tracer_reset(struct trace_array *tr)
> --
> 1.5.4.rc3
>
Looks good.
* Li Zefan <[email protected]> wrote:
> While trying to use blktrace in -tip tree, I encounted kernel NULL
> dereference, so I looked into the code, and then found some other
> bugs.
>
> This patchset is part 1, and I have some other pending fixes.
>
> Each patch is small and straightforward, and should be easy to review:
>
> [PATCH 1/7] blktrace: fix possible memory leak
> [PATCH 2/7] blktrace: make blk_tracer_enabled a bool flag
> [PATCH 3/7] blktrace: remove blk_probe_mutex
> [PATCH 4/7] blktrace: don't increase blk_probes_ref if failed to setup blk trace
> [PATCH 5/7] blktrace: report EBUSY correctly
> [PATCH 6/7] blktrace: remove sysfs_blk_trace_enable_show/store()
> [PATCH 7/7] blktrace: avoid accessing NULL bdev->bd_disk
> ---
> blktrace.c | 154 +++++++++++++++++--------------------------------------------
> 1 file changed, 45 insertions(+), 109 deletions(-)
>
> Signed-off-by: Li Zefan <[email protected]>
Nice fixes. Applied to tip:tracing/blktrace, thanks!
Ingo
Commit-ID: 86d7c7acc6b5ec500f319120f7e72ea62fb6252d
Gitweb: http://git.kernel.org/tip/86d7c7acc6b5ec500f319120f7e72ea62fb6252d
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 09:47:30 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 20 Mar 2009 11:20:04 +0100
blktrace: fix possible memory leak
When we failed to create "block" debugfs dir, we should do some
cleanups.
Signed-off-by: Li Zefan <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Steven Rostedt <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b171778..fb3bc53 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -432,7 +432,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!blk_tree_root) {
blk_tree_root = debugfs_create_dir("block", NULL);
if (!blk_tree_root)
- return -ENOMEM;
+ goto err;
}
dir = debugfs_create_dir(buts->name, blk_tree_root);
Commit-ID: a2df44c805026d40fae556b04f9addbce486bbe2
Gitweb: http://git.kernel.org/tip/a2df44c805026d40fae556b04f9addbce486bbe2
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 09:48:26 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 20 Mar 2009 11:20:05 +0100
blktrace: remove blk_probe_mutex
blk_register_tracepoints() always returns 0, so make it return void,
thus we don't need to use blk_probe_mutex to protect blk_probes_ref.
Signed-off-by: Li Zefan <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Steven Rostedt <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 27 +++++----------------------
1 files changed, 5 insertions(+), 22 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 73845b7..223b92e 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = {
};
/* Global reference count of probes */
-static DEFINE_MUTEX(blk_probe_mutex);
static atomic_t blk_probes_ref = ATOMIC_INIT(0);
-static int blk_register_tracepoints(void);
+static void blk_register_tracepoints(void);
static void blk_unregister_tracepoints(void);
/*
@@ -256,10 +255,8 @@ static void blk_trace_cleanup(struct blk_trace *bt)
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
- mutex_lock(&blk_probe_mutex);
if (atomic_dec_and_test(&blk_probes_ref))
blk_unregister_tracepoints();
- mutex_unlock(&blk_probe_mutex);
}
int blk_trace_remove(struct request_queue *q)
@@ -471,13 +468,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
- mutex_lock(&blk_probe_mutex);
- if (atomic_add_return(1, &blk_probes_ref) == 1) {
- ret = blk_register_tracepoints();
- if (ret)
- goto probe_err;
- }
- mutex_unlock(&blk_probe_mutex);
+ if (atomic_add_return(1, &blk_probes_ref) == 1)
+ blk_register_tracepoints();
ret = -EBUSY;
old_bt = xchg(&q->blk_trace, bt);
@@ -487,9 +479,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
}
return 0;
-probe_err:
- atomic_dec(&blk_probes_ref);
- mutex_unlock(&blk_probe_mutex);
err:
if (bt) {
if (bt->msg_file)
@@ -863,7 +852,7 @@ void blk_add_driver_data(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
-static int blk_register_tracepoints(void)
+static void blk_register_tracepoints(void)
{
int ret;
@@ -901,7 +890,6 @@ static int blk_register_tracepoints(void)
WARN_ON(ret);
ret = register_trace_block_remap(blk_add_trace_remap);
WARN_ON(ret);
- return 0;
}
static void blk_unregister_tracepoints(void)
@@ -1099,11 +1087,8 @@ static void blk_tracer_print_header(struct seq_file *m)
static void blk_tracer_start(struct trace_array *tr)
{
- mutex_lock(&blk_probe_mutex);
if (atomic_add_return(1, &blk_probes_ref) == 1)
- if (blk_register_tracepoints())
- atomic_dec(&blk_probes_ref);
- mutex_unlock(&blk_probe_mutex);
+ blk_register_tracepoints();
trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
}
@@ -1118,10 +1103,8 @@ static int blk_tracer_init(struct trace_array *tr)
static void blk_tracer_stop(struct trace_array *tr)
{
trace_flags |= TRACE_ITER_CONTEXT_INFO;
- mutex_lock(&blk_probe_mutex);
if (atomic_dec_and_test(&blk_probes_ref))
blk_unregister_tracepoints();
- mutex_unlock(&blk_probe_mutex);
}
static void blk_tracer_reset(struct trace_array *tr)
Commit-ID: 55feae54fd7ccfcbdf198e34e05ff61d9e975ed3
Gitweb: http://git.kernel.org/tip/55feae54fd7ccfcbdf198e34e05ff61d9e975ed3
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 09:49:08 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 20 Mar 2009 11:20:06 +0100
blktrace: report EBUSY correctly
blk_trace_remove_queue() returns EINVAL if q->blk_trace == NULL,
but blk_trace_setup_queue() doesn't return EBUSY if
q->blk_trace != NULL.
# echo 0 > sdaX/trace/enable
# echo 0 > sdaX/trace/enable
bash: echo: write error: Invalid argument
# echo 1 > sdaX/trace/enable
# echo 1 > sdaX/trace/enable
(should return EBUSY)
Signed-off-by: Li Zefan <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Steven Rostedt <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 9 +++------
1 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 11e7c8d..14986af 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1260,12 +1260,10 @@ static int blk_trace_remove_queue(struct request_queue *q)
static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
{
struct blk_trace *old_bt, *bt = NULL;
- int ret;
- ret = -ENOMEM;
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
if (!bt)
- goto err;
+ return -ENOMEM;
bt->dev = dev;
bt->act_mask = (u16)-1;
@@ -1276,11 +1274,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
if (old_bt != NULL) {
(void)xchg(&q->blk_trace, old_bt);
kfree(bt);
- ret = -EBUSY;
+ return -EBUSY;
}
+
return 0;
-err:
- return ret;
}
/*
Commit-ID: e73fe657c9a1edba1611b4642b2d226634c1a652
Gitweb: http://git.kernel.org/tip/e73fe657c9a1edba1611b4642b2d226634c1a652
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 09:48:03 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 20 Mar 2009 11:20:04 +0100
blktrace: make blk_tracer_enabled a bool flag
It doesn't have to be a counter, and it can be a bool flag instead.
Signed-off-by: Li Zefan <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Steven Rostedt <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 12 +++---------
1 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fb3bc53..73845b7 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -30,7 +30,7 @@
static unsigned int blktrace_seq __read_mostly = 1;
static struct trace_array *blk_tr;
-static int __read_mostly blk_tracer_enabled;
+static bool blk_tracer_enabled __read_mostly;
/* Select an alternative, minimalistic output than the original one */
#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -1111,9 +1111,7 @@ static int blk_tracer_init(struct trace_array *tr)
{
blk_tr = tr;
blk_tracer_start(tr);
- mutex_lock(&blk_probe_mutex);
- blk_tracer_enabled++;
- mutex_unlock(&blk_probe_mutex);
+ blk_tracer_enabled = true;
return 0;
}
@@ -1131,11 +1129,7 @@ static void blk_tracer_reset(struct trace_array *tr)
if (!atomic_read(&blk_probes_ref))
return;
- mutex_lock(&blk_probe_mutex);
- blk_tracer_enabled--;
- WARN_ON(blk_tracer_enabled < 0);
- mutex_unlock(&blk_probe_mutex);
-
+ blk_tracer_enabled = false;
blk_tracer_stop(tr);
}
Commit-ID: 9626d7ee4addbf97245c129e24ba9a9b765bd5ef
Gitweb: http://git.kernel.org/tip/9626d7ee4addbf97245c129e24ba9a9b765bd5ef
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 11:33:55 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 20 Mar 2009 11:20:06 +0100
blktrace: remove sysfs_blk_trace_enable_show/store()
sysfs_blk_trace_enable_show()/store() share most of code with
sysfs_blk_trace_attr_show()/store().
Signed-off-by: Li Zefan <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Steven Rostedt <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 92 +++++++++++-----------------------------------
1 files changed, 22 insertions(+), 70 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 14986af..dfee6f9 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1284,72 +1284,6 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
* sysfs interface to enable and configure tracing
*/
-static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct block_device *bdev;
- ssize_t ret = -ENXIO;
-
- lock_kernel();
- bdev = bdget(part_devt(p));
- if (bdev != NULL) {
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (q != NULL) {
- mutex_lock(&bdev->bd_mutex);
- ret = sprintf(buf, "%u\n", !!q->blk_trace);
- mutex_unlock(&bdev->bd_mutex);
- }
-
- bdput(bdev);
- }
-
- unlock_kernel();
- return ret;
-}
-
-static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct block_device *bdev;
- struct request_queue *q;
- struct hd_struct *p;
- int value;
- ssize_t ret = -ENXIO;
-
- if (count == 0 || sscanf(buf, "%d", &value) != 1)
- goto out;
-
- lock_kernel();
- p = dev_to_part(dev);
- bdev = bdget(part_devt(p));
- if (bdev == NULL)
- goto out_unlock_kernel;
-
- q = bdev_get_queue(bdev);
- if (q == NULL)
- goto out_bdput;
-
- mutex_lock(&bdev->bd_mutex);
- if (value)
- ret = blk_trace_setup_queue(q, bdev->bd_dev);
- else
- ret = blk_trace_remove_queue(q);
- mutex_unlock(&bdev->bd_mutex);
-
- if (ret == 0)
- ret = count;
-out_bdput:
- bdput(bdev);
-out_unlock_kernel:
- unlock_kernel();
-out:
- return ret;
-}
-
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf);
@@ -1361,8 +1295,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
sysfs_blk_trace_attr_show, \
sysfs_blk_trace_attr_store)
-static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
- sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
+static BLK_TRACE_DEVICE_ATTR(enable);
static BLK_TRACE_DEVICE_ATTR(act_mask);
static BLK_TRACE_DEVICE_ATTR(pid);
static BLK_TRACE_DEVICE_ATTR(start_lba);
@@ -1447,6 +1380,12 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (q == NULL)
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
+
+ if (attr == &dev_attr_enable) {
+ ret = sprintf(buf, "%u\n", !!q->blk_trace);
+ goto out_unlock_bdev;
+ }
+
if (q->blk_trace == NULL)
ret = sprintf(buf, "disabled\n");
else if (attr == &dev_attr_act_mask)
@@ -1457,6 +1396,8 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
else if (attr == &dev_attr_end_lba)
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+
+out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
@@ -1499,6 +1440,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
+
+ if (attr == &dev_attr_enable) {
+ if (value)
+ ret = blk_trace_setup_queue(q, bdev->bd_dev);
+ else
+ ret = blk_trace_remove_queue(q);
+ goto out_unlock_bdev;
+ }
+
ret = 0;
if (q->blk_trace == NULL)
ret = blk_trace_setup_queue(q, bdev->bd_dev);
@@ -1512,13 +1462,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
q->blk_trace->start_lba = value;
else if (attr == &dev_attr_end_lba)
q->blk_trace->end_lba = value;
- ret = count;
}
+
+out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
out_unlock_kernel:
unlock_kernel();
out:
- return ret;
+ return ret ? ret : count;
}
+
Commit-ID: afaebb6720cc62df1b03e36c2f699b00cfee784e
Gitweb: http://git.kernel.org/tip/afaebb6720cc62df1b03e36c2f699b00cfee784e
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 10:34:00 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 20 Mar 2009 11:20:07 +0100
blktrace: avoid accessing NULL bdev->bd_disk
bdev->bd_disk can be NULL, if the block device is not opened.
Try this against an unmounted partition, and you'll see NULL dereference:
# echo 1 > /sys/block/sda/sda5/enable
Signed-off-by: Li Zefan <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Steven Rostedt <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 13 +++++++++++--
1 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index dfee6f9..108f4f7 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1362,6 +1362,14 @@ static int blk_str2act_mask(const char *str)
return mask;
}
+static struct request_queue *blk_trace_get_queue(struct block_device *bdev)
+{
+ if (bdev->bd_disk == NULL)
+ return NULL;
+
+ return bdev_get_queue(bdev);
+}
+
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1376,9 +1384,10 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (bdev == NULL)
goto out_unlock_kernel;
- q = bdev_get_queue(bdev);
+ q = blk_trace_get_queue(bdev);
if (q == NULL)
goto out_bdput;
+
mutex_lock(&bdev->bd_mutex);
if (attr == &dev_attr_enable) {
@@ -1435,7 +1444,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
if (bdev == NULL)
goto out_unlock_kernel;
- q = bdev_get_queue(bdev);
+ q = blk_trace_get_queue(bdev);
if (q == NULL)
goto out_bdput;
Commit-ID: 64ddedc03c9ecbe97b0700e85c5f11d2f5a4565c
Gitweb: http://git.kernel.org/tip/64ddedc03c9ecbe97b0700e85c5f11d2f5a4565c
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 09:48:47 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Fri, 20 Mar 2009 11:20:05 +0100
blktrace: don't increase blk_probes_ref if failed to setup blk trace
do_blk_trace_setup() may return EBUSY, but the current code
doesn't decrease blk_probes_ref in this case.
Signed-off-by: Li Zefan <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Steven Rostedt <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 223b92e..11e7c8d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -468,9 +468,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
- if (atomic_add_return(1, &blk_probes_ref) == 1)
- blk_register_tracepoints();
-
ret = -EBUSY;
old_bt = xchg(&q->blk_trace, bt);
if (old_bt) {
@@ -478,6 +475,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
goto err;
}
+ if (atomic_add_return(1, &blk_probes_ref) == 1)
+ blk_register_tracepoints();
+
return 0;
err:
if (bt) {
* Ingo Molnar <[email protected]> wrote:
>
> * Li Zefan <[email protected]> wrote:
>
> > While trying to use blktrace in -tip tree, I encounted kernel NULL
> > dereference, so I looked into the code, and then found some other
> > bugs.
> >
> > This patchset is part 1, and I have some other pending fixes.
> >
> > Each patch is small and straightforward, and should be easy to review:
> >
> > [PATCH 1/7] blktrace: fix possible memory leak
> > [PATCH 2/7] blktrace: make blk_tracer_enabled a bool flag
> > [PATCH 3/7] blktrace: remove blk_probe_mutex
> > [PATCH 4/7] blktrace: don't increase blk_probes_ref if failed to setup blk trace
> > [PATCH 5/7] blktrace: report EBUSY correctly
> > [PATCH 6/7] blktrace: remove sysfs_blk_trace_enable_show/store()
> > [PATCH 7/7] blktrace: avoid accessing NULL bdev->bd_disk
> > ---
> > blktrace.c | 154 +++++++++++++++++--------------------------------------------
> > 1 file changed, 45 insertions(+), 109 deletions(-)
> >
> > Signed-off-by: Li Zefan <[email protected]>
>
> Nice fixes. Applied to tip:tracing/blktrace, thanks!
Which would go upstream in the .30 merge window (unless Jens or
Steve objects of course).
Ingo
Em Fri, Mar 20, 2009 at 09:47:30AM +0800, Li Zefan escreveu:
> When we failed to create "block" debugfs dir, we should do come cleanups.
>
> Signed-off-by: Li Zefan <[email protected]>
> ---
> kernel/trace/blktrace.c | 2 +-
> 1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> index b171778..fb3bc53 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -432,7 +432,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
> if (!blk_tree_root) {
> blk_tree_root = debugfs_create_dir("block", NULL);
> if (!blk_tree_root)
> - return -ENOMEM;
> + goto err;
> }
>
> dir = debugfs_create_dir(buts->name, blk_tree_root);
Nice catch,
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
Em Fri, Mar 20, 2009 at 09:48:03AM +0800, Li Zefan escreveu:
> It doesn't have to be a counter, and it can be a bool flag instead.
>
> Signed-off-by: Li Zefan <[email protected]>
> ---
> kernel/trace/blktrace.c | 12 +++---------
> 1 files changed, 3 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
> index fb3bc53..73845b7 100644
> --- a/kernel/trace/blktrace.c
> +++ b/kernel/trace/blktrace.c
> @@ -30,7 +30,7 @@
> static unsigned int blktrace_seq __read_mostly = 1;
>
> static struct trace_array *blk_tr;
> -static int __read_mostly blk_tracer_enabled;
> +static bool blk_tracer_enabled __read_mostly;
>
> /* Select an alternative, minimalistic output than the original one */
> #define TRACE_BLK_OPT_CLASSIC 0x1
> @@ -1111,9 +1111,7 @@ static int blk_tracer_init(struct trace_array *tr)
> {
> blk_tr = tr;
> blk_tracer_start(tr);
> - mutex_lock(&blk_probe_mutex);
> - blk_tracer_enabled++;
> - mutex_unlock(&blk_probe_mutex);
> + blk_tracer_enabled = true;
> return 0;
> }
>
> @@ -1131,11 +1129,7 @@ static void blk_tracer_reset(struct trace_array *tr)
> if (!atomic_read(&blk_probes_ref))
> return;
>
> - mutex_lock(&blk_probe_mutex);
> - blk_tracer_enabled--;
> - WARN_ON(blk_tracer_enabled < 0);
> - mutex_unlock(&blk_probe_mutex);
> -
> + blk_tracer_enabled = false;
> blk_tracer_stop(tr);
> }
Copy'n'pasted code, some other tracer still have this style:
enable_branch_tracing, enable_mmiotrace,
tracing_start_sched_switch_record
Anyway, thanks,
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
- Arnaldo
Em Fri, Mar 20, 2009 at 09:48:26AM +0800, Li Zefan escreveu:
> blk_register_tracepoints() always returns 0, so make it return void, thus
> we don't need to use blk_probe_mutex to protect blk_probes_ref.
>
> Signed-off-by: Li Zefan <[email protected]>
Historic reasons, when I first worked on this I found all those WARN_ONs
in blk_register_tracepoints a bogosity that should be later fixed, but
to reduce the patch size I never got around to submit the proper patch,
so I don't think that the fix is what you suggests, here it is what I
had before I removed the non-essential parts of the patch:
+static void blk_tracer_error(const char *name)
+{
+ pr_info("blk trace: Couldn't activate tracepoint "
+ "probe to block_%s\n", name);
+}
+
+#define register_trace_block(tpoint) \
+ ret = register_trace_block_##tpoint(blk_add_trace_##tpoint); \
+ if (ret) { \
+ blk_tracer_error(#tpoint); \
+ goto *exit_point; \
+ } else \
+ exit_point = &&fail_deprobe_##tpoint;
+
+
+#define fail_trace_block(tpoint) \
+ fail_deprobe_##tpoint: \
+ unregister_trace_block_##tpoint(blk_add_trace_##tpoint)
+
+static int tracing_blk_register(void)
+{
+ int ret;
+ void *exit_point = &&error;
+
+ register_trace_block(rq_abort);
+ register_trace_block(rq_insert);
+ register_trace_block(rq_issue);
+ register_trace_block(rq_requeue);
+ register_trace_block(rq_complete);
+ register_trace_block(bio_bounce);
+ register_trace_block(bio_backmerge);
+ register_trace_block(bio_frontmerge);
+ register_trace_block(bio_queue);
+ register_trace_block(getrq);
+ register_trace_block(sleeprq);
+ register_trace_block(plug);
+ register_trace_block(unplug_timer);
+ register_trace_block(unplug_io);
+ register_trace_block(split);
+ register_trace_block(remap);
+
+ return 0;
+
+ fail_trace_block(remap);
+ fail_trace_block(split);
+ fail_trace_block(unplug_io);
+ fail_trace_block(unplug_timer);
+ fail_trace_block(plug);
+ fail_trace_block(sleeprq);
+ fail_trace_block(getrq);
+ fail_trace_block(bio_queue);
+ fail_trace_block(bio_frontmerge);
+ fail_trace_block(bio_backmerge);
+ fail_trace_block(bio_bounce);
+ fail_trace_block(rq_complete);
+ fail_trace_block(rq_requeue);
+ fail_trace_block(rq_issue);
+ fail_trace_block(rq_insert);
+ fail_trace_block(rq_abort);
+error:
+ return ret;
+}
+
+static void tracing_blk_unregister(void)
+{
+ unregister_trace_block_remap(blk_add_trace_remap);
+ unregister_trace_block_split(blk_add_trace_split);
+ unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
+ unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
+ unregister_trace_block_plug(blk_add_trace_plug);
+ unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
+ unregister_trace_block_getrq(blk_add_trace_getrq);
+ unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
+ unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
+ unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
+ unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
+ unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
+ unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
+ unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
+ unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
+ unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
+ unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
+ tracepoint_synchronize_unregister();
+}
Regards,
- Arnaldo
Em Fri, Mar 20, 2009 at 09:48:47AM +0800, Li Zefan escreveu:
> do_blk_trace_setup() may return EBUSY, but the current code doesn't
> decrease blk_probes_ref in this case.
>
> Signed-off-by: Li Zefan <[email protected]>
Looks valid,
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
- Arnaldo
Em Fri, Mar 20, 2009 at 09:49:28AM +0800, Li Zefan escreveu:
> sysfs_blk_trace_enable_show()/store() share most of code with
> sysfs_blk_trace_attr_show()/store().
>
> Signed-off-by: Li Zefan <[email protected]>
Nice consolidation, thanks!
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
- Arnaldo
Em Fri, Mar 20, 2009 at 09:49:48AM +0800, Li Zefan escreveu:
> bdev->bd_disk can be NULL, if the block device is not opened.
>
> Try this against an unmounted partition, and you'll see NULL dereference:
> # echo 1 > /sys/block/sda/sda5/enable
>
> Signed-off-by: Li Zefan <[email protected]>
I guess this can be discounted on my block layer newbeeness 8)
But as an extra check in non hot-paths are usually a good thing...
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
- Arnaldo
Em Fri, Mar 20, 2009 at 09:49:08AM +0800, Li Zefan escreveu:
> blk_trace_remove_queue() returns EINVAL if q->blk_trace == NULL,
> but blk_trace_setup_queue() doesn't return EBUSY if q->blk_trace != NULL.
>
> # echo 0 > sdaX/trace/enable
> # echo 0 > sdaX/trace/enable
> bash: echo: write error: Invalid argument
> # echo 1 > sdaX/trace/enable
> # echo 1 > sdaX/trace/enable
> (should return EBUSY)
>
> Signed-off-by: Li Zefan <[email protected]>
Thanks for all the testing, its being really appreciated,
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
- Arnaldo
* Arnaldo Carvalho de Melo <[email protected]> wrote:
> Em Fri, Mar 20, 2009 at 09:49:08AM +0800, Li Zefan escreveu:
> > blk_trace_remove_queue() returns EINVAL if q->blk_trace == NULL,
> > but blk_trace_setup_queue() doesn't return EBUSY if q->blk_trace != NULL.
> >
> > # echo 0 > sdaX/trace/enable
> > # echo 0 > sdaX/trace/enable
> > bash: echo: write error: Invalid argument
> > # echo 1 > sdaX/trace/enable
> > # echo 1 > sdaX/trace/enable
> > (should return EBUSY)
> >
> > Signed-off-by: Li Zefan <[email protected]>
>
> Thanks for all the testing, its being really appreciated,
>
> Acked-by: Arnaldo Carvalho de Melo <[email protected]>
thanks Arnaldo - i've added your acks to tip:tracing/blktrace.
Jens, are the changes fine with you too, can i merge them into
tracing/core, for v2.6.30?
Ingo
Commit-ID: 1a17662ea033674a58bad3603531b0b5d42572f6
Gitweb: http://git.kernel.org/tip/1a17662ea033674a58bad3603531b0b5d42572f6
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 09:47:30 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 21 Mar 2009 16:15:47 +0100
blktrace: fix possible memory leak
When we failed to create "block" debugfs dir, we should do some
cleanups.
Signed-off-by: Li Zefan <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Steven Rostedt <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b171778..fb3bc53 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -432,7 +432,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!blk_tree_root) {
blk_tree_root = debugfs_create_dir("block", NULL);
if (!blk_tree_root)
- return -ENOMEM;
+ goto err;
}
dir = debugfs_create_dir(buts->name, blk_tree_root);
Commit-ID: cbe28296eb1ac441b35cf45804d0ae808add7dd1
Gitweb: http://git.kernel.org/tip/cbe28296eb1ac441b35cf45804d0ae808add7dd1
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 09:48:47 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 21 Mar 2009 16:16:37 +0100
blktrace: don't increase blk_probes_ref if failed to setup blk trace
do_blk_trace_setup() may return EBUSY, but the current code
doesn't decrease blk_probes_ref in this case.
Signed-off-by: Li Zefan <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Steven Rostedt <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 223b92e..11e7c8d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -468,9 +468,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
- if (atomic_add_return(1, &blk_probes_ref) == 1)
- blk_register_tracepoints();
-
ret = -EBUSY;
old_bt = xchg(&q->blk_trace, bt);
if (old_bt) {
@@ -478,6 +475,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
goto err;
}
+ if (atomic_add_return(1, &blk_probes_ref) == 1)
+ blk_register_tracepoints();
+
return 0;
err:
if (bt) {
Commit-ID: 5006ea73f38caef6065d1136808413813271633f
Gitweb: http://git.kernel.org/tip/5006ea73f38caef6065d1136808413813271633f
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 09:48:03 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 21 Mar 2009 16:16:13 +0100
blktrace: make blk_tracer_enabled a bool flag
It doesn't have to be a counter, and it can be a bool flag instead.
Signed-off-by: Li Zefan <[email protected]>
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Steven Rostedt <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 12 +++---------
1 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fb3bc53..73845b7 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -30,7 +30,7 @@
static unsigned int blktrace_seq __read_mostly = 1;
static struct trace_array *blk_tr;
-static int __read_mostly blk_tracer_enabled;
+static bool blk_tracer_enabled __read_mostly;
/* Select an alternative, minimalistic output than the original one */
#define TRACE_BLK_OPT_CLASSIC 0x1
@@ -1111,9 +1111,7 @@ static int blk_tracer_init(struct trace_array *tr)
{
blk_tr = tr;
blk_tracer_start(tr);
- mutex_lock(&blk_probe_mutex);
- blk_tracer_enabled++;
- mutex_unlock(&blk_probe_mutex);
+ blk_tracer_enabled = true;
return 0;
}
@@ -1131,11 +1129,7 @@ static void blk_tracer_reset(struct trace_array *tr)
if (!atomic_read(&blk_probes_ref))
return;
- mutex_lock(&blk_probe_mutex);
- blk_tracer_enabled--;
- WARN_ON(blk_tracer_enabled < 0);
- mutex_unlock(&blk_probe_mutex);
-
+ blk_tracer_enabled = false;
blk_tracer_stop(tr);
}
Commit-ID: 3c289ba7c320560ee74979a8895141c829046a2d
Gitweb: http://git.kernel.org/tip/3c289ba7c320560ee74979a8895141c829046a2d
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 09:48:26 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 21 Mar 2009 16:16:25 +0100
blktrace: remove blk_probe_mutex
blk_register_tracepoints() always returns 0, so make it return void,
thus we don't need to use blk_probe_mutex to protect blk_probes_ref.
Signed-off-by: Li Zefan <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Steven Rostedt <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 27 +++++----------------------
1 files changed, 5 insertions(+), 22 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 73845b7..223b92e 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -47,10 +47,9 @@ static struct tracer_flags blk_tracer_flags = {
};
/* Global reference count of probes */
-static DEFINE_MUTEX(blk_probe_mutex);
static atomic_t blk_probes_ref = ATOMIC_INIT(0);
-static int blk_register_tracepoints(void);
+static void blk_register_tracepoints(void);
static void blk_unregister_tracepoints(void);
/*
@@ -256,10 +255,8 @@ static void blk_trace_cleanup(struct blk_trace *bt)
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
- mutex_lock(&blk_probe_mutex);
if (atomic_dec_and_test(&blk_probes_ref))
blk_unregister_tracepoints();
- mutex_unlock(&blk_probe_mutex);
}
int blk_trace_remove(struct request_queue *q)
@@ -471,13 +468,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
- mutex_lock(&blk_probe_mutex);
- if (atomic_add_return(1, &blk_probes_ref) == 1) {
- ret = blk_register_tracepoints();
- if (ret)
- goto probe_err;
- }
- mutex_unlock(&blk_probe_mutex);
+ if (atomic_add_return(1, &blk_probes_ref) == 1)
+ blk_register_tracepoints();
ret = -EBUSY;
old_bt = xchg(&q->blk_trace, bt);
@@ -487,9 +479,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
}
return 0;
-probe_err:
- atomic_dec(&blk_probes_ref);
- mutex_unlock(&blk_probe_mutex);
err:
if (bt) {
if (bt->msg_file)
@@ -863,7 +852,7 @@ void blk_add_driver_data(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_add_driver_data);
-static int blk_register_tracepoints(void)
+static void blk_register_tracepoints(void)
{
int ret;
@@ -901,7 +890,6 @@ static int blk_register_tracepoints(void)
WARN_ON(ret);
ret = register_trace_block_remap(blk_add_trace_remap);
WARN_ON(ret);
- return 0;
}
static void blk_unregister_tracepoints(void)
@@ -1099,11 +1087,8 @@ static void blk_tracer_print_header(struct seq_file *m)
static void blk_tracer_start(struct trace_array *tr)
{
- mutex_lock(&blk_probe_mutex);
if (atomic_add_return(1, &blk_probes_ref) == 1)
- if (blk_register_tracepoints())
- atomic_dec(&blk_probes_ref);
- mutex_unlock(&blk_probe_mutex);
+ blk_register_tracepoints();
trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
}
@@ -1118,10 +1103,8 @@ static int blk_tracer_init(struct trace_array *tr)
static void blk_tracer_stop(struct trace_array *tr)
{
trace_flags |= TRACE_ITER_CONTEXT_INFO;
- mutex_lock(&blk_probe_mutex);
if (atomic_dec_and_test(&blk_probes_ref))
blk_unregister_tracepoints();
- mutex_unlock(&blk_probe_mutex);
}
static void blk_tracer_reset(struct trace_array *tr)
Commit-ID: 15152e448b693fa41de40f1e40ffbe717a3aab88
Gitweb: http://git.kernel.org/tip/15152e448b693fa41de40f1e40ffbe717a3aab88
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 09:49:08 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 21 Mar 2009 16:16:54 +0100
blktrace: report EBUSY correctly
blk_trace_remove_queue() returns EINVAL if q->blk_trace == NULL,
but blk_trace_setup_queue() doesn't return EBUSY if
q->blk_trace != NULL.
# echo 0 > sdaX/trace/enable
# echo 0 > sdaX/trace/enable
bash: echo: write error: Invalid argument
# echo 1 > sdaX/trace/enable
# echo 1 > sdaX/trace/enable
(should return EBUSY)
Signed-off-by: Li Zefan <[email protected]>
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Steven Rostedt <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 9 +++------
1 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 11e7c8d..14986af 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1260,12 +1260,10 @@ static int blk_trace_remove_queue(struct request_queue *q)
static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
{
struct blk_trace *old_bt, *bt = NULL;
- int ret;
- ret = -ENOMEM;
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
if (!bt)
- goto err;
+ return -ENOMEM;
bt->dev = dev;
bt->act_mask = (u16)-1;
@@ -1276,11 +1274,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
if (old_bt != NULL) {
(void)xchg(&q->blk_trace, old_bt);
kfree(bt);
- ret = -EBUSY;
+ return -EBUSY;
}
+
return 0;
-err:
- return ret;
}
/*
Commit-ID: b125130b22d67f249beba10b71a254558b5279d0
Gitweb: http://git.kernel.org/tip/b125130b22d67f249beba10b71a254558b5279d0
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 10:34:00 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 21 Mar 2009 16:17:24 +0100
blktrace: avoid accessing NULL bdev->bd_disk
bdev->bd_disk can be NULL, if the block device is not opened.
Try this against an unmounted partition, and you'll see NULL dereference:
# echo 1 > /sys/block/sda/sda5/enable
Signed-off-by: Li Zefan <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Steven Rostedt <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 13 +++++++++++--
1 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index dfee6f9..108f4f7 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1362,6 +1362,14 @@ static int blk_str2act_mask(const char *str)
return mask;
}
+static struct request_queue *blk_trace_get_queue(struct block_device *bdev)
+{
+ if (bdev->bd_disk == NULL)
+ return NULL;
+
+ return bdev_get_queue(bdev);
+}
+
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -1376,9 +1384,10 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (bdev == NULL)
goto out_unlock_kernel;
- q = bdev_get_queue(bdev);
+ q = blk_trace_get_queue(bdev);
if (q == NULL)
goto out_bdput;
+
mutex_lock(&bdev->bd_mutex);
if (attr == &dev_attr_enable) {
@@ -1435,7 +1444,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
if (bdev == NULL)
goto out_unlock_kernel;
- q = bdev_get_queue(bdev);
+ q = blk_trace_get_queue(bdev);
if (q == NULL)
goto out_bdput;
Commit-ID: cd649b8bb830d65c57c3c8b98d57b5402256d8bd
Gitweb: http://git.kernel.org/tip/cd649b8bb830d65c57c3c8b98d57b5402256d8bd
Author: Li Zefan <[email protected]>
AuthorDate: Fri, 20 Mar 2009 11:33:55 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 21 Mar 2009 16:17:08 +0100
blktrace: remove sysfs_blk_trace_enable_show/store()
sysfs_blk_trace_enable_show()/store() share most of code with
sysfs_blk_trace_attr_show()/store().
Signed-off-by: Li Zefan <[email protected]>
Acked-by: Frederic Weisbecker <[email protected]>
Acked-by: Arnaldo Carvalho de Melo <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Steven Rostedt <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/trace/blktrace.c | 92 +++++++++++-----------------------------------
1 files changed, 22 insertions(+), 70 deletions(-)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 14986af..dfee6f9 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1284,72 +1284,6 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
* sysfs interface to enable and configure tracing
*/
-static ssize_t sysfs_blk_trace_enable_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct hd_struct *p = dev_to_part(dev);
- struct block_device *bdev;
- ssize_t ret = -ENXIO;
-
- lock_kernel();
- bdev = bdget(part_devt(p));
- if (bdev != NULL) {
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (q != NULL) {
- mutex_lock(&bdev->bd_mutex);
- ret = sprintf(buf, "%u\n", !!q->blk_trace);
- mutex_unlock(&bdev->bd_mutex);
- }
-
- bdput(bdev);
- }
-
- unlock_kernel();
- return ret;
-}
-
-static ssize_t sysfs_blk_trace_enable_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct block_device *bdev;
- struct request_queue *q;
- struct hd_struct *p;
- int value;
- ssize_t ret = -ENXIO;
-
- if (count == 0 || sscanf(buf, "%d", &value) != 1)
- goto out;
-
- lock_kernel();
- p = dev_to_part(dev);
- bdev = bdget(part_devt(p));
- if (bdev == NULL)
- goto out_unlock_kernel;
-
- q = bdev_get_queue(bdev);
- if (q == NULL)
- goto out_bdput;
-
- mutex_lock(&bdev->bd_mutex);
- if (value)
- ret = blk_trace_setup_queue(q, bdev->bd_dev);
- else
- ret = blk_trace_remove_queue(q);
- mutex_unlock(&bdev->bd_mutex);
-
- if (ret == 0)
- ret = count;
-out_bdput:
- bdput(bdev);
-out_unlock_kernel:
- unlock_kernel();
-out:
- return ret;
-}
-
static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf);
@@ -1361,8 +1295,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
sysfs_blk_trace_attr_show, \
sysfs_blk_trace_attr_store)
-static DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
- sysfs_blk_trace_enable_show, sysfs_blk_trace_enable_store);
+static BLK_TRACE_DEVICE_ATTR(enable);
static BLK_TRACE_DEVICE_ATTR(act_mask);
static BLK_TRACE_DEVICE_ATTR(pid);
static BLK_TRACE_DEVICE_ATTR(start_lba);
@@ -1447,6 +1380,12 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (q == NULL)
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
+
+ if (attr == &dev_attr_enable) {
+ ret = sprintf(buf, "%u\n", !!q->blk_trace);
+ goto out_unlock_bdev;
+ }
+
if (q->blk_trace == NULL)
ret = sprintf(buf, "disabled\n");
else if (attr == &dev_attr_act_mask)
@@ -1457,6 +1396,8 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
else if (attr == &dev_attr_end_lba)
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
+
+out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
@@ -1499,6 +1440,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
+
+ if (attr == &dev_attr_enable) {
+ if (value)
+ ret = blk_trace_setup_queue(q, bdev->bd_dev);
+ else
+ ret = blk_trace_remove_queue(q);
+ goto out_unlock_bdev;
+ }
+
ret = 0;
if (q->blk_trace == NULL)
ret = blk_trace_setup_queue(q, bdev->bd_dev);
@@ -1512,13 +1462,15 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
q->blk_trace->start_lba = value;
else if (attr == &dev_attr_end_lba)
q->blk_trace->end_lba = value;
- ret = count;
}
+
+out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
out_bdput:
bdput(bdev);
out_unlock_kernel:
unlock_kernel();
out:
- return ret;
+ return ret ? ret : count;
}
+
Arnaldo Carvalho de Melo wrote:
> Em Fri, Mar 20, 2009 at 09:48:26AM +0800, Li Zefan escreveu:
>> blk_register_tracepoints() always returns 0, so make it return void, thus
>> we don't need to use blk_probe_mutex to protect blk_probes_ref.
>>
>> Signed-off-by: Li Zefan <[email protected]>
>
> Historic reasons, when I first worked on this I found all those WARN_ONs
> in blk_register_tracepoints a bogosity that should be later fixed, but
> to reduce the patch size I never got around to submit the proper patch,
> so I don't think that the fix is what you suggests, here it is what I
> had before I removed the non-essential parts of the patch:
>
I though register_trace_xxx() will fail only when there is a name collision,
in this case WARN_ON() and return 0 should be reasonable. But I just dig into
the code and found it may allocate memory and thus might return ENOMEM, so
I guess it's better to check the return value, though it will hardly happen
in real-life..
> +static void blk_tracer_error(const char *name)
> +{
> + pr_info("blk trace: Couldn't activate tracepoint "
> + "probe to block_%s\n", name);
> +}
> +
> +#define register_trace_block(tpoint) \
> + ret = register_trace_block_##tpoint(blk_add_trace_##tpoint); \
> + if (ret) { \
> + blk_tracer_error(#tpoint); \
> + goto *exit_point; \
> + } else \
> + exit_point = &&fail_deprobe_##tpoint;
> +
> +
> +#define fail_trace_block(tpoint) \
> + fail_deprobe_##tpoint: \
> + unregister_trace_block_##tpoint(blk_add_trace_##tpoint)
> +
> +static int tracing_blk_register(void)
> +{
> + int ret;
> + void *exit_point = &&error;
> +
> + register_trace_block(rq_abort);
> + register_trace_block(rq_insert);
> + register_trace_block(rq_issue);
> + register_trace_block(rq_requeue);
> + register_trace_block(rq_complete);
> + register_trace_block(bio_bounce);
> + register_trace_block(bio_backmerge);
> + register_trace_block(bio_frontmerge);
> + register_trace_block(bio_queue);
> + register_trace_block(getrq);
> + register_trace_block(sleeprq);
> + register_trace_block(plug);
> + register_trace_block(unplug_timer);
> + register_trace_block(unplug_io);
> + register_trace_block(split);
> + register_trace_block(remap);
> +
> + return 0;
> +
> + fail_trace_block(remap);
> + fail_trace_block(split);
> + fail_trace_block(unplug_io);
> + fail_trace_block(unplug_timer);
> + fail_trace_block(plug);
> + fail_trace_block(sleeprq);
> + fail_trace_block(getrq);
> + fail_trace_block(bio_queue);
> + fail_trace_block(bio_frontmerge);
> + fail_trace_block(bio_backmerge);
> + fail_trace_block(bio_bounce);
> + fail_trace_block(rq_complete);
> + fail_trace_block(rq_requeue);
> + fail_trace_block(rq_issue);
> + fail_trace_block(rq_insert);
> + fail_trace_block(rq_abort);
> +error:
> + return ret;
> +}
> +
> +static void tracing_blk_unregister(void)
> +{
> + unregister_trace_block_remap(blk_add_trace_remap);
> + unregister_trace_block_split(blk_add_trace_split);
> + unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
> + unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
> + unregister_trace_block_plug(blk_add_trace_plug);
> + unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
> + unregister_trace_block_getrq(blk_add_trace_getrq);
> + unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
> + unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
> + unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
> + unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
> + unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
> + unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
> + unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
> + unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
> + unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
> + unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
> + tracepoint_synchronize_unregister();
> +}
>
> Regards,
>
> - Arnaldo
>
>
Em Sun, Mar 22, 2009 at 02:04:14PM +0800, Li Zefan escreveu:
> Arnaldo Carvalho de Melo wrote:
> > Em Fri, Mar 20, 2009 at 09:48:26AM +0800, Li Zefan escreveu:
> >> blk_register_tracepoints() always returns 0, so make it return void, thus
> >> we don't need to use blk_probe_mutex to protect blk_probes_ref.
> >>
> >> Signed-off-by: Li Zefan <[email protected]>
> >
> > Historic reasons, when I first worked on this I found all those WARN_ONs
> > in blk_register_tracepoints a bogosity that should be later fixed, but
> > to reduce the patch size I never got around to submit the proper patch,
> > so I don't think that the fix is what you suggests, here it is what I
> > had before I removed the non-essential parts of the patch:
> >
>
> I though register_trace_xxx() will fail only when there is a name collision,
> in this case WARN_ON() and return 0 should be reasonable. But I just dig into
> the code and found it may allocate memory and thus might return ENOMEM, so
> I guess it's better to check the return value, though it will hardly happen
> in real-life..
Well, I disagree with that, if it can fail, it will fail someday, if
failing is such a disastrous thing, register_trace_xxx() should return
void and do the WARN_ON directly :-)
- Arnaldo
> > +static void blk_tracer_error(const char *name)
> > +{
> > + pr_info("blk trace: Couldn't activate tracepoint "
> > + "probe to block_%s\n", name);
> > +}
> > +
> > +#define register_trace_block(tpoint) \
> > + ret = register_trace_block_##tpoint(blk_add_trace_##tpoint); \
> > + if (ret) { \
> > + blk_tracer_error(#tpoint); \
> > + goto *exit_point; \
> > + } else \
> > + exit_point = &&fail_deprobe_##tpoint;
> > +
> > +
> > +#define fail_trace_block(tpoint) \
> > + fail_deprobe_##tpoint: \
> > + unregister_trace_block_##tpoint(blk_add_trace_##tpoint)
> > +
> > +static int tracing_blk_register(void)
> > +{
> > + int ret;
> > + void *exit_point = &&error;
> > +
> > + register_trace_block(rq_abort);
> > + register_trace_block(rq_insert);
> > + register_trace_block(rq_issue);
> > + register_trace_block(rq_requeue);
> > + register_trace_block(rq_complete);
> > + register_trace_block(bio_bounce);
> > + register_trace_block(bio_backmerge);
> > + register_trace_block(bio_frontmerge);
> > + register_trace_block(bio_queue);
> > + register_trace_block(getrq);
> > + register_trace_block(sleeprq);
> > + register_trace_block(plug);
> > + register_trace_block(unplug_timer);
> > + register_trace_block(unplug_io);
> > + register_trace_block(split);
> > + register_trace_block(remap);
> > +
> > + return 0;
> > +
> > + fail_trace_block(remap);
> > + fail_trace_block(split);
> > + fail_trace_block(unplug_io);
> > + fail_trace_block(unplug_timer);
> > + fail_trace_block(plug);
> > + fail_trace_block(sleeprq);
> > + fail_trace_block(getrq);
> > + fail_trace_block(bio_queue);
> > + fail_trace_block(bio_frontmerge);
> > + fail_trace_block(bio_backmerge);
> > + fail_trace_block(bio_bounce);
> > + fail_trace_block(rq_complete);
> > + fail_trace_block(rq_requeue);
> > + fail_trace_block(rq_issue);
> > + fail_trace_block(rq_insert);
> > + fail_trace_block(rq_abort);
> > +error:
> > + return ret;
> > +}
> > +
> > +static void tracing_blk_unregister(void)
> > +{
> > + unregister_trace_block_remap(blk_add_trace_remap);
> > + unregister_trace_block_split(blk_add_trace_split);
> > + unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
> > + unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
> > + unregister_trace_block_plug(blk_add_trace_plug);
> > + unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
> > + unregister_trace_block_getrq(blk_add_trace_getrq);
> > + unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
> > + unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
> > + unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
> > + unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
> > + unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
> > + unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
> > + unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
> > + unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
> > + unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
> > + unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
> > + tracepoint_synchronize_unregister();
> > +}
> >
> > Regards,
> >
> > - Arnaldo
> >
> >
On Fri, 20 Mar 2009, Ingo Molnar wrote:
>
> * Ingo Molnar <[email protected]> wrote:
>
> >
> > * Li Zefan <[email protected]> wrote:
> >
> > > While trying to use blktrace in -tip tree, I encounted kernel NULL
> > > dereference, so I looked into the code, and then found some other
> > > bugs.
> > >
> > > This patchset is part 1, and I have some other pending fixes.
> > >
> > > Each patch is small and straightforward, and should be easy to review:
> > >
> > > [PATCH 1/7] blktrace: fix possible memory leak
> > > [PATCH 2/7] blktrace: make blk_tracer_enabled a bool flag
> > > [PATCH 3/7] blktrace: remove blk_probe_mutex
> > > [PATCH 4/7] blktrace: don't increase blk_probes_ref if failed to setup blk trace
> > > [PATCH 5/7] blktrace: report EBUSY correctly
> > > [PATCH 6/7] blktrace: remove sysfs_blk_trace_enable_show/store()
> > > [PATCH 7/7] blktrace: avoid accessing NULL bdev->bd_disk
> > > ---
> > > blktrace.c | 154 +++++++++++++++++--------------------------------------------
> > > 1 file changed, 45 insertions(+), 109 deletions(-)
> > >
> > > Signed-off-by: Li Zefan <[email protected]>
> >
> > Nice fixes. Applied to tip:tracing/blktrace, thanks!
>
> Which would go upstream in the .30 merge window (unless Jens or
> Steve objects of course).
I didn't see anything wrong with these patches. (sorry for the late reply,
I'm still going through emails).
Acked-by: Steven Rostedt <[email protected]>
-- Steve