2013-03-18 16:51:19

by Roger Pau Monne

[permalink] [raw]
Subject: [PATCH 0/5] xen-block: cleanup and fixes

This series contains the cleanups and fixes found in my previous
indirect descriptors series, that are aimed for linux 3.9.

Available in the git repository at:
git://xenbits.xen.org/people/royger/linux.git blk-for-3.9

Roger Pau Monne (5):
xen-blkback: don't store dev_bus_addr
xen-blkback: fix foreach_grant_safe to handle empty lists
xen-blkfront: switch from llist to list
xen-blkfront: pre-allocate pages for requests
xen-blkfront: remove frame list from blk_shadow

drivers/block/xen-blkback/blkback.c | 19 +---
drivers/block/xen-blkback/common.h | 1 -
drivers/block/xen-blkfront.c | 151 +++++++++++++++++++++--------------
3 files changed, 95 insertions(+), 76 deletions(-)


2013-03-18 16:51:21

by Roger Pau Monne

[permalink] [raw]
Subject: [PATCH 1/5] xen-blkback: don't store dev_bus_addr

dev_bus_addr returned in the grant ref map operation is the mfn of the
passed page, there's no need to store it in the persistent grant
entry, since we can always get it provided that we have the page.

This reduces the memory overhead of persistent grants in blkback.

Signed-off-by: Roger Pau Monné <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: [email protected]
---
drivers/block/xen-blkback/blkback.c | 17 ++++-------------
drivers/block/xen-blkback/common.h | 1 -
2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 477a17c..d909acf0 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req,
* If this is a new persistent grant
* save the handler
*/
- persistent_gnts[i]->handle = map[j].handle;
- persistent_gnts[i]->dev_bus_addr =
- map[j++].dev_bus_addr;
+ persistent_gnts[i]->handle = map[j++].handle;
}
pending_handle(pending_req, i) =
persistent_gnts[i]->handle;

if (ret)
continue;
-
- seg[i].buf = persistent_gnts[i]->dev_bus_addr |
- (req->u.rw.seg[i].first_sect << 9);
} else {
- pending_handle(pending_req, i) = map[j].handle;
+ pending_handle(pending_req, i) = map[j++].handle;
bitmap_set(pending_req->unmap_seg, i, 1);

- if (ret) {
- j++;
+ if (ret)
continue;
- }
-
- seg[i].buf = map[j++].dev_bus_addr |
- (req->u.rw.seg[i].first_sect << 9);
}
+ seg[i].buf = (req->u.rw.seg[i].first_sect << 9);
}
return ret;
}
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index da78346..60103e2 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -187,7 +187,6 @@ struct persistent_gnt {
struct page *page;
grant_ref_t gnt;
grant_handle_t handle;
- uint64_t dev_bus_addr;
struct rb_node node;
};

--
1.7.7.5 (Apple Git-26)

2013-03-18 16:51:23

by Roger Pau Monne

[permalink] [raw]
Subject: [PATCH 2/5] xen-blkback: fix foreach_grant_safe to handle empty lists

We may use foreach_grant_safe in the future with empty lists, so make
sure we can handle them.

Signed-off-by: Roger Pau Monné <[email protected]>
Cc: [email protected]
Cc: Konrad Rzeszutek Wilk <[email protected]>
---
drivers/block/xen-blkback/blkback.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index d909acf0..7319f67 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -164,7 +164,7 @@ static void make_response(struct xen_blkif *blkif, u64 id,

#define foreach_grant_safe(pos, n, rbtree, node) \
for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \
- (n) = rb_next(&(pos)->node); \
+ (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL; \
&(pos)->node != NULL; \
(pos) = container_of(n, typeof(*(pos)), node), \
(n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
--
1.7.7.5 (Apple Git-26)

2013-03-18 16:51:48

by Roger Pau Monne

[permalink] [raw]
Subject: [PATCH 5/5] xen-blkfront: remove frame list from blk_shadow

We already have the frame (pfn of the grant page) stored inside struct
grant, so there's no need to keep an aditional list of mapped frames
for a specific request. This reduces memory usage in blkfront.

Signed-off-by: Roger Pau Monné <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: [email protected]
---
drivers/block/xen-blkfront.c | 6 +-----
1 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c640433..a894f88 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -74,7 +74,6 @@ struct grant {
struct blk_shadow {
struct blkif_request req;
struct request *request;
- unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};

@@ -356,7 +355,6 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
static int blkif_queue_request(struct request *req)
{
struct blkfront_info *info = req->rq_disk->private_data;
- unsigned long buffer_mfn;
struct blkif_request *ring_req;
unsigned long id;
unsigned int fsect, lsect;
@@ -434,7 +432,6 @@ static int blkif_queue_request(struct request *req)

gnt_list_entry = get_grant(&gref_head, info);
ref = gnt_list_entry->gref;
- buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);

info->shadow[id].grants_used[i] = gnt_list_entry;

@@ -465,7 +462,6 @@ static int blkif_queue_request(struct request *req)
kunmap_atomic(shared_data);
}

- info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
ring_req->u.rw.seg[i] =
(struct blkif_request_segment) {
.gref = ref,
@@ -1268,7 +1264,7 @@ static int blkif_recover(struct blkfront_info *info)
gnttab_grant_foreign_access_ref(
req->u.rw.seg[j].gref,
info->xbdev->otherend_id,
- pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
+ pfn_to_mfn(copy[i].grants_used[j]->pfn),
0);
}
info->shadow[req->u.rw.id].req = *req;
--
1.7.7.5 (Apple Git-26)

2013-03-18 16:51:49

by Roger Pau Monne

[permalink] [raw]
Subject: [PATCH 4/5] xen-blkfront: pre-allocate pages for requests

This prevents us from having to call alloc_page while we are preparing
the request. Since blkfront was calling alloc_page with a spinlock
held we used GFP_ATOMIC, which can fail if we are requesting a lot of
pages since it is using the emergency memory pools.

Allocating all the pages at init prevents us from having to call
alloc_page, thus preventing possible failures.

Signed-off-by: Roger Pau Monné <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: [email protected]
---
Changes since RFC:
* Move page buffer initialization to setup_blkring
---
drivers/block/xen-blkfront.c | 120 +++++++++++++++++++++++++++--------------
1 files changed, 79 insertions(+), 41 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 97324cd1..c640433 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -165,6 +165,69 @@ static int add_id_to_freelist(struct blkfront_info *info,
return 0;
}

+static int fill_grant_buffer(struct blkfront_info *info, int num)
+{
+ struct page *granted_page;
+ struct grant *gnt_list_entry, *n;
+ int i = 0;
+
+ while(i < num) {
+ gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
+ if (!gnt_list_entry)
+ goto out_of_memory;
+
+ granted_page = alloc_page(GFP_NOIO);
+ if (!granted_page) {
+ kfree(gnt_list_entry);
+ goto out_of_memory;
+ }
+
+ gnt_list_entry->pfn = page_to_pfn(granted_page);
+ gnt_list_entry->gref = GRANT_INVALID_REF;
+ list_add(&gnt_list_entry->node, &info->persistent_gnts);
+ i++;
+ }
+
+ return 0;
+
+out_of_memory:
+ list_for_each_entry_safe(gnt_list_entry, n,
+ &info->persistent_gnts, node) {
+ list_del(&gnt_list_entry->node);
+ __free_page(pfn_to_page(gnt_list_entry->pfn));
+ kfree(gnt_list_entry);
+ i--;
+ }
+ BUG_ON(i != 0);
+ return -ENOMEM;
+}
+
+static struct grant *get_grant(grant_ref_t *gref_head,
+ struct blkfront_info *info)
+{
+ struct grant *gnt_list_entry;
+ unsigned long buffer_mfn;
+
+ BUG_ON(list_empty(&info->persistent_gnts));
+ gnt_list_entry = list_first_entry(&info->persistent_gnts, struct grant,
+ node);
+ list_del(&gnt_list_entry->node);
+
+ if (gnt_list_entry->gref != GRANT_INVALID_REF) {
+ info->persistent_gnts_c--;
+ return gnt_list_entry;
+ }
+
+ /* Assign a gref to this page */
+ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
+ BUG_ON(gnt_list_entry->gref == -ENOSPC);
+ buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
+ gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
+ info->xbdev->otherend_id,
+ buffer_mfn, 0);
+ return gnt_list_entry;
+}
+
static const char *op_name(int op)
{
static const char *const names[] = {
@@ -306,7 +369,6 @@ static int blkif_queue_request(struct request *req)
*/
bool new_persistent_gnts;
grant_ref_t gref_head;
- struct page *granted_page;
struct grant *gnt_list_entry = NULL;
struct scatterlist *sg;

@@ -370,42 +432,9 @@ static int blkif_queue_request(struct request *req)
fsect = sg->offset >> 9;
lsect = fsect + (sg->length >> 9) - 1;

- if (info->persistent_gnts_c) {
- BUG_ON(list_empty(&info->persistent_gnts));
- gnt_list_entry = list_first_entry(
- &info->persistent_gnts,
- struct grant, node);
- list_del(&gnt_list_entry->node);
-
- ref = gnt_list_entry->gref;
- buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
- info->persistent_gnts_c--;
- } else {
- ref = gnttab_claim_grant_reference(&gref_head);
- BUG_ON(ref == -ENOSPC);
-
- gnt_list_entry =
- kmalloc(sizeof(struct grant),
- GFP_ATOMIC);
- if (!gnt_list_entry)
- return -ENOMEM;
-
- granted_page = alloc_page(GFP_ATOMIC);
- if (!granted_page) {
- kfree(gnt_list_entry);
- return -ENOMEM;
- }
-
- gnt_list_entry->pfn =
- page_to_pfn(granted_page);
- gnt_list_entry->gref = ref;
-
- buffer_mfn = pfn_to_mfn(page_to_pfn(
- granted_page));
- gnttab_grant_foreign_access_ref(ref,
- info->xbdev->otherend_id,
- buffer_mfn, 0);
- }
+ gnt_list_entry = get_grant(&gref_head, info);
+ ref = gnt_list_entry->gref;
+ buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);

info->shadow[id].grants_used[i] = gnt_list_entry;

@@ -803,17 +832,20 @@ static void blkif_free(struct blkfront_info *info, int suspend)
blk_stop_queue(info->rq);

/* Remove all persistent grants */
- if (info->persistent_gnts_c) {
+ if (!list_empty(&info->persistent_gnts)) {
list_for_each_entry_safe(persistent_gnt, n,
&info->persistent_gnts, node) {
list_del(&persistent_gnt->node);
- gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+ if (persistent_gnt->gref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(persistent_gnt->gref,
+ 0, 0UL);
+ info->persistent_gnts_c--;
+ }
__free_page(pfn_to_page(persistent_gnt->pfn));
kfree(persistent_gnt);
- info->persistent_gnts_c--;
}
- BUG_ON(info->persistent_gnts_c != 0);
}
+ BUG_ON(info->persistent_gnts_c != 0);

/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
@@ -1008,6 +1040,12 @@ static int setup_blkring(struct xenbus_device *dev,

sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);

+ /* Allocate memory for grants */
+ err = fill_grant_buffer(info, BLK_RING_SIZE *
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ if (err)
+ goto fail;
+
err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
if (err < 0) {
free_page((unsigned long)sring);
--
1.7.7.5 (Apple Git-26)

2013-03-18 16:52:29

by Roger Pau Monne

[permalink] [raw]
Subject: [PATCH 3/5] xen-blkfront: switch from llist to list

Replace the use of llist with list.

llist_for_each_entry_safe can trigger a bug in GCC 4.1, so it's best
to remove it and use a doubly linked list, which is used extensively
in the kernel already.

Specifically this bug can be triggered by hot-unplugging a disk,
either by doing xm block-detach or by save/restore cycle.

BUG: unable to handle kernel paging request at fffffffffffffff0
IP: [<ffffffffa0047223>] blkif_free+0x63/0x130 [xen_blkfront]
The crash call trace is:
...
bad_area_nosemaphore+0x13/0x20
do_page_fault+0x25e/0x4b0
page_fault+0x25/0x30
? blkif_free+0x63/0x130 [xen_blkfront]
blkfront_resume+0x46/0xa0 [xen_blkfront]
xenbus_dev_resume+0x6c/0x140
pm_op+0x192/0x1b0
device_resume+0x82/0x1e0
dpm_resume+0xc9/0x1a0
dpm_resume_end+0x15/0x30
do_suspend+0x117/0x1e0

When drilling down to the assembler code, on newer GCC it does
.L29:
cmpq $-16, %r12 #, persistent_gnt check
je .L30 #, out of the loop
.L25:
... code in the loop
testq %r13, %r13 # n
je .L29 #, back to the top of the loop
cmpq $-16, %r12 #, persistent_gnt check
movq 16(%r12), %r13 # <variable>.node.next, n
jne .L25 #, back to the top of the loop
.L30:

While on GCC 4.1, it is:
L78:
... code in the loop
testq %r13, %r13 # n
je .L78 #, back to the top of the loop
movq 16(%rbx), %r13 # <variable>.node.next, n
jmp .L78 #, back to the top of the loop

Which basically means that the exit loop condition instead of
being:

&(pos)->member != NULL;

is:
;

which makes the loop unbound.

Since we always manipulate the list while holding the io_lock, there's
no need for additional locking (llist used previously is safe to use
concurrently without additional locking).

Should be backported to 3.8 stable.

Signed-off-by: Roger Pau Monné <[email protected]>
[Part of the description]
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
Cc: [email protected]
---
drivers/block/xen-blkfront.c | 41 ++++++++++++++++++-----------------------
1 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9620644..97324cd1 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -44,7 +44,7 @@
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
-#include <linux/llist.h>
+#include <linux/list.h>

#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -68,7 +68,7 @@ enum blkif_state {
struct grant {
grant_ref_t gref;
unsigned long pfn;
- struct llist_node node;
+ struct list_head node;
};

struct blk_shadow {
@@ -105,7 +105,7 @@ struct blkfront_info
struct work_struct work;
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE];
- struct llist_head persistent_gnts;
+ struct list_head persistent_gnts;
unsigned int persistent_gnts_c;
unsigned long shadow_free;
unsigned int feature_flush;
@@ -371,10 +371,11 @@ static int blkif_queue_request(struct request *req)
lsect = fsect + (sg->length >> 9) - 1;

if (info->persistent_gnts_c) {
- BUG_ON(llist_empty(&info->persistent_gnts));
- gnt_list_entry = llist_entry(
- llist_del_first(&info->persistent_gnts),
- struct grant, node);
+ BUG_ON(list_empty(&info->persistent_gnts));
+ gnt_list_entry = list_first_entry(
+ &info->persistent_gnts,
+ struct grant, node);
+ list_del(&gnt_list_entry->node);

ref = gnt_list_entry->gref;
buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
@@ -790,9 +791,8 @@ static void blkif_restart_queue(struct work_struct *work)

static void blkif_free(struct blkfront_info *info, int suspend)
{
- struct llist_node *all_gnts;
- struct grant *persistent_gnt, *tmp;
- struct llist_node *n;
+ struct grant *persistent_gnt;
+ struct grant *n;

/* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&info->io_lock);
@@ -804,20 +804,15 @@ static void blkif_free(struct blkfront_info *info, int suspend)

/* Remove all persistent grants */
if (info->persistent_gnts_c) {
- all_gnts = llist_del_all(&info->persistent_gnts);
- persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node);
- while (persistent_gnt) {
+ list_for_each_entry_safe(persistent_gnt, n,
+ &info->persistent_gnts, node) {
+ list_del(&persistent_gnt->node);
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
__free_page(pfn_to_page(persistent_gnt->pfn));
- tmp = persistent_gnt;
- n = persistent_gnt->node.next;
- if (n)
- persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node);
- else
- persistent_gnt = NULL;
- kfree(tmp);
+ kfree(persistent_gnt);
+ info->persistent_gnts_c--;
}
- info->persistent_gnts_c = 0;
+ BUG_ON(info->persistent_gnts_c != 0);
}

/* No more gnttab callback work. */
@@ -875,7 +870,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
}
/* Add the persistent grant into the list of free grants */
for (i = 0; i < s->req.u.rw.nr_segments; i++) {
- llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
+ list_add(&s->grants_used[i]->node, &info->persistent_gnts);
info->persistent_gnts_c++;
}
}
@@ -1171,7 +1166,7 @@ static int blkfront_probe(struct xenbus_device *dev,
spin_lock_init(&info->io_lock);
info->xbdev = dev;
info->vdevice = vdevice;
- init_llist_head(&info->persistent_gnts);
+ INIT_LIST_HEAD(&info->persistent_gnts);
info->persistent_gnts_c = 0;
info->connected = BLKIF_STATE_DISCONNECTED;
INIT_WORK(&info->work, blkif_restart_queue);
--
1.7.7.5 (Apple Git-26)

2013-03-19 12:52:00

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH 3/5] xen-blkfront: switch from llist to list

On Mon, Mar 18, 2013 at 05:49:34PM +0100, Roger Pau Monne wrote:
> Replace the use of llist with list.
>
> llist_for_each_entry_safe can trigger a bug in GCC 4.1, so it's best
> to remove it and use a doubly linked list, which is used extensively
> in the kernel already.
>
> Specifically this bug can be triggered by hot-unplugging a disk,
> either by doing xm block-detach or by save/restore cycle.
>
> BUG: unable to handle kernel paging request at fffffffffffffff0
> IP: [<ffffffffa0047223>] blkif_free+0x63/0x130 [xen_blkfront]
> The crash call trace is:
> ...
> bad_area_nosemaphore+0x13/0x20
> do_page_fault+0x25e/0x4b0
> page_fault+0x25/0x30
> ? blkif_free+0x63/0x130 [xen_blkfront]
> blkfront_resume+0x46/0xa0 [xen_blkfront]
> xenbus_dev_resume+0x6c/0x140
> pm_op+0x192/0x1b0
> device_resume+0x82/0x1e0
> dpm_resume+0xc9/0x1a0
> dpm_resume_end+0x15/0x30
> do_suspend+0x117/0x1e0
>
> When drilling down to the assembler code, on newer GCC it does
> .L29:
> cmpq $-16, %r12 #, persistent_gnt check
> je .L30 #, out of the loop
> .L25:
> ... code in the loop
> testq %r13, %r13 # n
> je .L29 #, back to the top of the loop
> cmpq $-16, %r12 #, persistent_gnt check
> movq 16(%r12), %r13 # <variable>.node.next, n
> jne .L25 #, back to the top of the loop
> .L30:
>
> While on GCC 4.1, it is:
> L78:
> ... code in the loop
> testq %r13, %r13 # n
> je .L78 #, back to the top of the loop
> movq 16(%rbx), %r13 # <variable>.node.next, n
> jmp .L78 #, back to the top of the loop
>
> Which basically means that the exit loop condition instead of
> being:
>
> &(pos)->member != NULL;
>
> is:
> ;
>
> which makes the loop unbound.
>
> Since we always manipulate the list while holding the io_lock, there's
> no need for additional locking (llist used previously is safe to use
> concurrently without additional locking).
>
> Should be backported to 3.8 stable.

I get
konrad@phenom:~/linux$ patch -p1 < /tmp/kk
patching file drivers/block/xen-blkfront.c
Hunk #1 FAILED at 44.
Hunk #2 FAILED at 68.
Hunk #3 FAILED at 105.
Hunk #4 FAILED at 371.
patch: **** malformed patch at line 172: ork)


and idea why? Could you resent the patch as an attachment please?

>
> Signed-off-by: Roger Pau Monn? <[email protected]>
> [Part of the description]
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> Cc: [email protected]
> ---
> drivers/block/xen-blkfront.c | 41 ++++++++++++++++++-----------------------
> 1 files changed, 18 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 9620644..97324cd1 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -44,7 +44,7 @@
> #include <linux/mutex.h>
> #include <linux/scatterlist.h>
> #include <linux/bitmap.h>
> -#include <linux/llist.h>
> +#include <linux/list.h>
>
> #include <xen/xen.h>
> #include <xen/xenbus.h>
> @@ -68,7 +68,7 @@ enum blkif_state {
> struct grant {
> grant_ref_t gref;
> unsigned long pfn;
> - struct llist_node node;
> + struct list_head node;
> };
>
> struct blk_shadow {
> @@ -105,7 +105,7 @@ struct blkfront_info
> struct work_struct work;
> struct gnttab_free_callback callback;
> struct blk_shadow shadow[BLK_RING_SIZE];
> - struct llist_head persistent_gnts;
> + struct list_head persistent_gnts;
> unsigned int persistent_gnts_c;
> unsigned long shadow_free;
> unsigned int feature_flush;
> @@ -371,10 +371,11 @@ static int blkif_queue_request(struct request *req)
> lsect = fsect + (sg->length >> 9) - 1;
>
> if (info->persistent_gnts_c) {
> - BUG_ON(llist_empty(&info->persistent_gnts));
> - gnt_list_entry = llist_entry(
> - llist_del_first(&info->persistent_gnts),
> - struct grant, node);
> + BUG_ON(list_empty(&info->persistent_gnts));
> + gnt_list_entry = list_first_entry(
> + &info->persistent_gnts,
> + struct grant, node);
> + list_del(&gnt_list_entry->node);
>
> ref = gnt_list_entry->gref;
> buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
> @@ -790,9 +791,8 @@ static void blkif_restart_queue(struct work_struct *work)
>
> static void blkif_free(struct blkfront_info *info, int suspend)
> {
> - struct llist_node *all_gnts;
> - struct grant *persistent_gnt, *tmp;
> - struct llist_node *n;
> + struct grant *persistent_gnt;
> + struct grant *n;
>
> /* Prevent new requests being issued until we fix things up. */
> spin_lock_irq(&info->io_lock);
> @@ -804,20 +804,15 @@ static void blkif_free(struct blkfront_info *info, int suspend)
>
> /* Remove all persistent grants */
> if (info->persistent_gnts_c) {
> - all_gnts = llist_del_all(&info->persistent_gnts);
> - persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node);
> - while (persistent_gnt) {
> + list_for_each_entry_safe(persistent_gnt, n,
> + &info->persistent_gnts, node) {
> + list_del(&persistent_gnt->node);
> gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
> __free_page(pfn_to_page(persistent_gnt->pfn));
> - tmp = persistent_gnt;
> - n = persistent_gnt->node.next;
> - if (n)
> - persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node);
> - else
> - persistent_gnt = NULL;
> - kfree(tmp);
> + kfree(persistent_gnt);
> + info->persistent_gnts_c--;
> }
> - info->persistent_gnts_c = 0;
> + BUG_ON(info->persistent_gnts_c != 0);
> }
>
> /* No more gnttab callback work. */
> @@ -875,7 +870,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
> }
> /* Add the persistent grant into the list of free grants */
> for (i = 0; i < s->req.u.rw.nr_segments; i++) {
> - llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
> + list_add(&s->grants_used[i]->node, &info->persistent_gnts);
> info->persistent_gnts_c++;
> }
> }
> @@ -1171,7 +1166,7 @@ static int blkfront_probe(struct xenbus_device *dev,
> spin_lock_init(&info->io_lock);
> info->xbdev = dev;
> info->vdevice = vdevice;
> - init_llist_head(&info->persistent_gnts);
> + INIT_LIST_HEAD(&info->persistent_gnts);
> info->persistent_gnts_c = 0;
> info->connected = BLKIF_STATE_DISCONNECTED;
> INIT_WORK(&info->work, blkif_restart_queue);
> --
> 1.7.7.5 (Apple Git-26)
>

2013-03-19 12:57:25

by Roger Pau Monne

[permalink] [raw]
Subject: Re: [PATCH 3/5] xen-blkfront: switch from llist to list

On 19/03/13 13:51, Konrad Rzeszutek Wilk wrote:
> On Mon, Mar 18, 2013 at 05:49:34PM +0100, Roger Pau Monne wrote:
>> Replace the use of llist with list.
>>
>> llist_for_each_entry_safe can trigger a bug in GCC 4.1, so it's best
>> to remove it and use a doubly linked list, which is used extensively
>> in the kernel already.
>>
>> Specifically this bug can be triggered by hot-unplugging a disk,
>> either by doing xm block-detach or by save/restore cycle.
>>
>> BUG: unable to handle kernel paging request at fffffffffffffff0
>> IP: [<ffffffffa0047223>] blkif_free+0x63/0x130 [xen_blkfront]
>> The crash call trace is:
>> ...
>> bad_area_nosemaphore+0x13/0x20
>> do_page_fault+0x25e/0x4b0
>> page_fault+0x25/0x30
>> ? blkif_free+0x63/0x130 [xen_blkfront]
>> blkfront_resume+0x46/0xa0 [xen_blkfront]
>> xenbus_dev_resume+0x6c/0x140
>> pm_op+0x192/0x1b0
>> device_resume+0x82/0x1e0
>> dpm_resume+0xc9/0x1a0
>> dpm_resume_end+0x15/0x30
>> do_suspend+0x117/0x1e0
>>
>> When drilling down to the assembler code, on newer GCC it does
>> .L29:
>> cmpq $-16, %r12 #, persistent_gnt check
>> je .L30 #, out of the loop
>> .L25:
>> ... code in the loop
>> testq %r13, %r13 # n
>> je .L29 #, back to the top of the loop
>> cmpq $-16, %r12 #, persistent_gnt check
>> movq 16(%r12), %r13 # <variable>.node.next, n
>> jne .L25 #, back to the top of the loop
>> .L30:
>>
>> While on GCC 4.1, it is:
>> L78:
>> ... code in the loop
>> testq %r13, %r13 # n
>> je .L78 #, back to the top of the loop
>> movq 16(%rbx), %r13 # <variable>.node.next, n
>> jmp .L78 #, back to the top of the loop
>>
>> Which basically means that the exit loop condition instead of
>> being:
>>
>> &(pos)->member != NULL;
>>
>> is:
>> ;
>>
>> which makes the loop unbound.
>>
>> Since we always manipulate the list while holding the io_lock, there's
>> no need for additional locking (llist used previously is safe to use
>> concurrently without additional locking).
>>
>> Should be backported to 3.8 stable.
>
> I get
> konrad@phenom:~/linux$ patch -p1 < /tmp/kk
> patching file drivers/block/xen-blkfront.c
> Hunk #1 FAILED at 44.
> Hunk #2 FAILED at 68.
> Hunk #3 FAILED at 105.
> Hunk #4 FAILED at 371.
> patch: **** malformed patch at line 172: ork)
>
>
> and idea why? Could you resent the patch as an attachment please?

Strange, I've used git send-email. I've attached it, but you can also
get them from:

http://xenbits.xen.org/gitweb/?p=people/royger/linux.git;a=shortlog;h=refs/heads/blk-for-3.9


Attachments:
0003-xen-blkfront-switch-from-llist-to-list.patch (5.93 kB)

2013-03-19 14:31:25

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH 3/5] xen-blkfront: switch from llist to list

On Mon, Mar 18, 2013 at 05:49:34PM +0100, Roger Pau Monne wrote:
> Replace the use of llist with list.
>

I applied this patch, but I changed the git commit to be:


commit 155b7edb51430a280f86c1e21b7be308b0d219d4
Author: Roger Pau Monne <[email protected]>
Date: Mon Mar 18 17:49:34 2013 +0100

xen-blkfront: switch from llist to list

The git commit f84adf4921ae3115502f44ff467b04bf2f88cf04
(xen-blkfront: drop the use of llist_for_each_entry_safe)

was a stop-gate to fix a GCC4.1 bug. The appropiate way
is to actually use an list instead of using an llist.

As such this patch replaces the usage of llist with an
list.

Since we always manipulate the list while holding the io_lock, there's
no need for additional locking (llist used previously is safe to use
concurrently without additional locking).

Signed-off-by: Roger Pau Monn? <[email protected]>
CC: [email protected]
[v1: Redid the git commit description]
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9620644..97324cd1 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -44,7 +44,7 @@
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
-#include <linux/llist.h>
+#include <linux/list.h>

#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -68,7 +68,7 @@ enum blkif_state {
struct grant {
grant_ref_t gref;
unsigned long pfn;
- struct llist_node node;
+ struct list_head node;
};

struct blk_shadow {
@@ -105,7 +105,7 @@ struct blkfront_info
struct work_struct work;
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE];
- struct llist_head persistent_gnts;
+ struct list_head persistent_gnts;
unsigned int persistent_gnts_c;
unsigned long shadow_free;
unsigned int feature_flush;
@@ -371,10 +371,11 @@ static int blkif_queue_request(struct request *req)
lsect = fsect + (sg->length >> 9) - 1;

if (info->persistent_gnts_c) {
- BUG_ON(llist_empty(&info->persistent_gnts));
- gnt_list_entry = llist_entry(
- llist_del_first(&info->persistent_gnts),
- struct grant, node);
+ BUG_ON(list_empty(&info->persistent_gnts));
+ gnt_list_entry = list_first_entry(
+ &info->persistent_gnts,
+ struct grant, node);
+ list_del(&gnt_list_entry->node);

ref = gnt_list_entry->gref;
buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
@@ -790,9 +791,8 @@ static void blkif_restart_queue(struct work_struct *work)

static void blkif_free(struct blkfront_info *info, int suspend)
{
- struct llist_node *all_gnts;
- struct grant *persistent_gnt, *tmp;
- struct llist_node *n;
+ struct grant *persistent_gnt;
+ struct grant *n;

/* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&info->io_lock);
@@ -804,20 +804,15 @@ static void blkif_free(struct blkfront_info *info, int suspend)

/* Remove all persistent grants */
if (info->persistent_gnts_c) {
- all_gnts = llist_del_all(&info->persistent_gnts);
- persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node);
- while (persistent_gnt) {
+ list_for_each_entry_safe(persistent_gnt, n,
+ &info->persistent_gnts, node) {
+ list_del(&persistent_gnt->node);
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
__free_page(pfn_to_page(persistent_gnt->pfn));
- tmp = persistent_gnt;
- n = persistent_gnt->node.next;
- if (n)
- persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node);
- else
- persistent_gnt = NULL;
- kfree(tmp);
+ kfree(persistent_gnt);
+ info->persistent_gnts_c--;
}
- info->persistent_gnts_c = 0;
+ BUG_ON(info->persistent_gnts_c != 0);
}

/* No more gnttab callback work. */
@@ -875,7 +870,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
}
/* Add the persistent grant into the list of free grants */
for (i = 0; i < s->req.u.rw.nr_segments; i++) {
- llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
+ list_add(&s->grants_used[i]->node, &info->persistent_gnts);
info->persistent_gnts_c++;
}
}
@@ -1171,7 +1166,7 @@ static int blkfront_probe(struct xenbus_device *dev,
spin_lock_init(&info->io_lock);
info->xbdev = dev;
info->vdevice = vdevice;
- init_llist_head(&info->persistent_gnts);
+ INIT_LIST_HEAD(&info->persistent_gnts);
info->persistent_gnts_c = 0;
info->connected = BLKIF_STATE_DISCONNECTED;
INIT_WORK(&info->work, blkif_restart_queue);

2013-03-19 14:32:26

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH 1/5] xen-blkback: don't store dev_bus_addr

On Mon, Mar 18, 2013 at 05:49:32PM +0100, Roger Pau Monne wrote:
> dev_bus_addr returned in the grant ref map operation is the mfn of the
> passed page, there's no need to store it in the persistent grant
> entry, since we can always get it provided that we have the page.
>
> This reduces the memory overhead of persistent grants in blkback.

I took this patch, but I redid it a bit:

commit 1d4cb410befdb8b373c6fad604b39e0200e0bee0
Author: Roger Pau Monne <[email protected]>
Date: Mon Mar 18 17:49:32 2013 +0100

xen-blkback: don't store dev_bus_addr

dev_bus_addr returned in the grant ref map operation is the mfn of the
passed page, there's no need to store it in the persistent grant
entry, since we can always get it provided that we have the page.

This reduces the memory overhead of persistent grants in blkback.

While at it, rename the 'seg[i].buf' to be 'seg[i].offset' as
it makes much more sense - as we use that value in bio_add_page
which as the fourth argument expects the offset.

We hadn't used the physical address as part of this at all.

Signed-off-by: Roger Pau Monn? <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: [email protected]
[v1: s/buf/offset/]
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 2cf8381..061c202 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -442,7 +442,7 @@ int xen_blkif_schedule(void *arg)
}

struct seg_buf {
- unsigned long buf;
+ unsigned long offset;
unsigned int nsec;
};
/*
@@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req,
* If this is a new persistent grant
* save the handler
*/
- persistent_gnts[i]->handle = map[j].handle;
- persistent_gnts[i]->dev_bus_addr =
- map[j++].dev_bus_addr;
+ persistent_gnts[i]->handle = map[j++].handle;
}
pending_handle(pending_req, i) =
persistent_gnts[i]->handle;

if (ret)
continue;
-
- seg[i].buf = persistent_gnts[i]->dev_bus_addr |
- (req->u.rw.seg[i].first_sect << 9);
} else {
- pending_handle(pending_req, i) = map[j].handle;
+ pending_handle(pending_req, i) = map[j++].handle;
bitmap_set(pending_req->unmap_seg, i, 1);

- if (ret) {
- j++;
+ if (ret)
continue;
- }
-
- seg[i].buf = map[j++].dev_bus_addr |
- (req->u.rw.seg[i].first_sect << 9);
}
+ seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
}
return ret;
}
@@ -971,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
(bio_add_page(bio,
pages[i],
seg[i].nsec << 9,
- seg[i].buf & ~PAGE_MASK) == 0)) {
+ seg[i].offset & ~PAGE_MASK) == 0)) {

bio = bio_alloc(GFP_KERNEL, nseg-i);
if (unlikely(bio == NULL))
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index da78346..60103e2 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -187,7 +187,6 @@ struct persistent_gnt {
struct page *page;
grant_ref_t gnt;
grant_handle_t handle;
- uint64_t dev_bus_addr;
struct rb_node node;
};

2013-03-19 14:56:03

by Jan Beulich

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH 1/5] xen-blkback: don't store dev_bus_addr

>>> On 19.03.13 at 15:32, Konrad Rzeszutek Wilk <[email protected]> wrote:
> On Mon, Mar 18, 2013 at 05:49:32PM +0100, Roger Pau Monne wrote:
>> dev_bus_addr returned in the grant ref map operation is the mfn of the
>> passed page, there's no need to store it in the persistent grant
>> entry, since we can always get it provided that we have the page.
>>
>> This reduces the memory overhead of persistent grants in blkback.
>
> I took this patch, but I redid it a bit:
>
> commit 1d4cb410befdb8b373c6fad604b39e0200e0bee0
> Author: Roger Pau Monne <[email protected]>
> Date: Mon Mar 18 17:49:32 2013 +0100
>
> xen-blkback: don't store dev_bus_addr
>
> dev_bus_addr returned in the grant ref map operation is the mfn of the
> passed page, there's no need to store it in the persistent grant
> entry, since we can always get it provided that we have the page.
>
> This reduces the memory overhead of persistent grants in blkback.
>
> While at it, rename the 'seg[i].buf' to be 'seg[i].offset' as
> it makes much more sense - as we use that value in bio_add_page
> which as the fourth argument expects the offset.
>
> We hadn't used the physical address as part of this at all.
>
> Signed-off-by: Roger Pau Monné <[email protected]>
> Cc: Konrad Rzeszutek Wilk <[email protected]>
> Cc: [email protected]
> [v1: s/buf/offset/]
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
>
> diff --git a/drivers/block/xen-blkback/blkback.c
> b/drivers/block/xen-blkback/blkback.c
> index 2cf8381..061c202 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -442,7 +442,7 @@ int xen_blkif_schedule(void *arg)
> }
>
> struct seg_buf {
> - unsigned long buf;
> + unsigned long offset;

If you touch this anyway, why don't you reduce the type to
"unsigned int", halving the overall structure size?

Even more, the field seems pointless to me altogether, since ...

> unsigned int nsec;
> };
> /*
> @@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req,
> * If this is a new persistent grant
> * save the handler
> */
> - persistent_gnts[i]->handle = map[j].handle;
> - persistent_gnts[i]->dev_bus_addr =
> - map[j++].dev_bus_addr;
> + persistent_gnts[i]->handle = map[j++].handle;
> }
> pending_handle(pending_req, i) =
> persistent_gnts[i]->handle;
>
> if (ret)
> continue;
> -
> - seg[i].buf = persistent_gnts[i]->dev_bus_addr |
> - (req->u.rw.seg[i].first_sect << 9);
> } else {
> - pending_handle(pending_req, i) = map[j].handle;
> + pending_handle(pending_req, i) = map[j++].handle;
> bitmap_set(pending_req->unmap_seg, i, 1);
>
> - if (ret) {
> - j++;
> + if (ret)
> continue;
> - }
> -
> - seg[i].buf = map[j++].dev_bus_addr |
> - (req->u.rw.seg[i].first_sect << 9);
> }
> + seg[i].offset = (req->u.rw.seg[i].first_sect << 9);

... this uses "i" as index on both sides, so ...

> }
> return ret;
> }
> @@ -971,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
> (bio_add_page(bio,
> pages[i],
> seg[i].nsec << 9,
> - seg[i].buf & ~PAGE_MASK) == 0)) {
> + seg[i].offset & ~PAGE_MASK) == 0)) {

... this one could as well use the original field.

And the masking with ~PAGE_MASK is not pointless in any case.

Jan

>
> bio = bio_alloc(GFP_KERNEL, nseg-i);
> if (unlikely(bio == NULL))
> diff --git a/drivers/block/xen-blkback/common.h
> b/drivers/block/xen-blkback/common.h
> index da78346..60103e2 100644
> --- a/drivers/block/xen-blkback/common.h
> +++ b/drivers/block/xen-blkback/common.h
> @@ -187,7 +187,6 @@ struct persistent_gnt {
> struct page *page;
> grant_ref_t gnt;
> grant_handle_t handle;
> - uint64_t dev_bus_addr;
> struct rb_node node;
> };
>
>
> _______________________________________________
> Xen-devel mailing list
> [email protected]
> http://lists.xen.org/xen-devel

2013-03-19 15:10:48

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH 1/5] xen-blkback: don't store dev_bus_addr

On Tue, Mar 19, 2013 at 02:55:56PM +0000, Jan Beulich wrote:
> >>> On 19.03.13 at 15:32, Konrad Rzeszutek Wilk <[email protected]> wrote:
> > On Mon, Mar 18, 2013 at 05:49:32PM +0100, Roger Pau Monne wrote:
> >> dev_bus_addr returned in the grant ref map operation is the mfn of the
> >> passed page, there's no need to store it in the persistent grant
> >> entry, since we can always get it provided that we have the page.
> >>
> >> This reduces the memory overhead of persistent grants in blkback.
> >
> > I took this patch, but I redid it a bit:
> >
> > commit 1d4cb410befdb8b373c6fad604b39e0200e0bee0
> > Author: Roger Pau Monne <[email protected]>
> > Date: Mon Mar 18 17:49:32 2013 +0100
> >
> > xen-blkback: don't store dev_bus_addr
> >
> > dev_bus_addr returned in the grant ref map operation is the mfn of the
> > passed page, there's no need to store it in the persistent grant
> > entry, since we can always get it provided that we have the page.
> >
> > This reduces the memory overhead of persistent grants in blkback.
> >
> > While at it, rename the 'seg[i].buf' to be 'seg[i].offset' as
> > it makes much more sense - as we use that value in bio_add_page
> > which as the fourth argument expects the offset.
> >
> > We hadn't used the physical address as part of this at all.
> >
> > Signed-off-by: Roger Pau Monn? <[email protected]>
> > Cc: Konrad Rzeszutek Wilk <[email protected]>
> > Cc: [email protected]
> > [v1: s/buf/offset/]
> > Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> >
> > diff --git a/drivers/block/xen-blkback/blkback.c
> > b/drivers/block/xen-blkback/blkback.c
> > index 2cf8381..061c202 100644
> > --- a/drivers/block/xen-blkback/blkback.c
> > +++ b/drivers/block/xen-blkback/blkback.c
> > @@ -442,7 +442,7 @@ int xen_blkif_schedule(void *arg)
> > }
> >
> > struct seg_buf {
> > - unsigned long buf;
> > + unsigned long offset;
>
> If you touch this anyway, why don't you reduce the type to
> "unsigned int", halving the overall structure size?
>
> Even more, the field seems pointless to me altogether, since ...
>
> > unsigned int nsec;
> > };
> > /*
> > @@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req,
> > * If this is a new persistent grant
> > * save the handler
> > */
> > - persistent_gnts[i]->handle = map[j].handle;
> > - persistent_gnts[i]->dev_bus_addr =
> > - map[j++].dev_bus_addr;
> > + persistent_gnts[i]->handle = map[j++].handle;
> > }
> > pending_handle(pending_req, i) =
> > persistent_gnts[i]->handle;
> >
> > if (ret)
> > continue;
> > -
> > - seg[i].buf = persistent_gnts[i]->dev_bus_addr |
> > - (req->u.rw.seg[i].first_sect << 9);
> > } else {
> > - pending_handle(pending_req, i) = map[j].handle;
> > + pending_handle(pending_req, i) = map[j++].handle;
> > bitmap_set(pending_req->unmap_seg, i, 1);
> >
> > - if (ret) {
> > - j++;
> > + if (ret)
> > continue;
> > - }
> > -
> > - seg[i].buf = map[j++].dev_bus_addr |
> > - (req->u.rw.seg[i].first_sect << 9);
> > }
> > + seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
>
> ... this uses "i" as index on both sides, so ...
>
> > }
> > return ret;
> > }
> > @@ -971,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
> > (bio_add_page(bio,
> > pages[i],
> > seg[i].nsec << 9,
> > - seg[i].buf & ~PAGE_MASK) == 0)) {
> > + seg[i].offset & ~PAGE_MASK) == 0)) {
>
> ... this one could as well use the original field.
>
> And the masking with ~PAGE_MASK is not pointless in any case.

Good point. In which might as well make the 'struct seg_buf' be an
simple array of unsigned int.

>
> Jan
>
> >
> > bio = bio_alloc(GFP_KERNEL, nseg-i);
> > if (unlikely(bio == NULL))
> > diff --git a/drivers/block/xen-blkback/common.h
> > b/drivers/block/xen-blkback/common.h
> > index da78346..60103e2 100644
> > --- a/drivers/block/xen-blkback/common.h
> > +++ b/drivers/block/xen-blkback/common.h
> > @@ -187,7 +187,6 @@ struct persistent_gnt {
> > struct page *page;
> > grant_ref_t gnt;
> > grant_handle_t handle;
> > - uint64_t dev_bus_addr;
> > struct rb_node node;
> > };
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > [email protected]
> > http://lists.xen.org/xen-devel
>
>

2013-03-19 15:24:13

by Jan Beulich

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH 1/5] xen-blkback: don't store dev_bus_addr

>>> On 19.03.13 at 16:10, Konrad Rzeszutek Wilk <[email protected]> wrote:
> On Tue, Mar 19, 2013 at 02:55:56PM +0000, Jan Beulich wrote:
>> >>> On 19.03.13 at 15:32, Konrad Rzeszutek Wilk <[email protected]> wrote:
>> > @@ -971,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>> > (bio_add_page(bio,
>> > pages[i],
>> > seg[i].nsec << 9,
>> > - seg[i].buf & ~PAGE_MASK) == 0)) {
>> > + seg[i].offset & ~PAGE_MASK) == 0)) {
>>
>> ... this one could as well use the original field.
>>
>> And the masking with ~PAGE_MASK is not pointless in any case.
>
> Good point. In which might as well make the 'struct seg_buf' be an
> simple array of unsigned int.

Looks like you understood that the "not" in my earlier response
was meant to be "now".

Jan

2013-03-19 15:26:10

by Roger Pau Monne

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH 1/5] xen-blkback: don't store dev_bus_addr

On 19/03/13 16:10, Konrad Rzeszutek Wilk wrote:
> On Tue, Mar 19, 2013 at 02:55:56PM +0000, Jan Beulich wrote:
>>>>> On 19.03.13 at 15:32, Konrad Rzeszutek Wilk <[email protected]> wrote:
>>> On Mon, Mar 18, 2013 at 05:49:32PM +0100, Roger Pau Monne wrote:
>>>> dev_bus_addr returned in the grant ref map operation is the mfn of the
>>>> passed page, there's no need to store it in the persistent grant
>>>> entry, since we can always get it provided that we have the page.
>>>>
>>>> This reduces the memory overhead of persistent grants in blkback.
>>>
>>> I took this patch, but I redid it a bit:
>>>
>>> commit 1d4cb410befdb8b373c6fad604b39e0200e0bee0
>>> Author: Roger Pau Monne <[email protected]>
>>> Date: Mon Mar 18 17:49:32 2013 +0100
>>>
>>> xen-blkback: don't store dev_bus_addr
>>>
>>> dev_bus_addr returned in the grant ref map operation is the mfn of the
>>> passed page, there's no need to store it in the persistent grant
>>> entry, since we can always get it provided that we have the page.
>>>
>>> This reduces the memory overhead of persistent grants in blkback.
>>>
>>> While at it, rename the 'seg[i].buf' to be 'seg[i].offset' as
>>> it makes much more sense - as we use that value in bio_add_page
>>> which as the fourth argument expects the offset.
>>>
>>> We hadn't used the physical address as part of this at all.
>>>
>>> Signed-off-by: Roger Pau Monn? <[email protected]>
>>> Cc: Konrad Rzeszutek Wilk <[email protected]>
>>> Cc: [email protected]
>>> [v1: s/buf/offset/]
>>> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
>>>
>>> diff --git a/drivers/block/xen-blkback/blkback.c
>>> b/drivers/block/xen-blkback/blkback.c
>>> index 2cf8381..061c202 100644
>>> --- a/drivers/block/xen-blkback/blkback.c
>>> +++ b/drivers/block/xen-blkback/blkback.c
>>> @@ -442,7 +442,7 @@ int xen_blkif_schedule(void *arg)
>>> }
>>>
>>> struct seg_buf {
>>> - unsigned long buf;
>>> + unsigned long offset;
>>
>> If you touch this anyway, why don't you reduce the type to
>> "unsigned int", halving the overall structure size?
>>
>> Even more, the field seems pointless to me altogether, since ...
>>
>>> unsigned int nsec;
>>> };
>>> /*
>>> @@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req,
>>> * If this is a new persistent grant
>>> * save the handler
>>> */
>>> - persistent_gnts[i]->handle = map[j].handle;
>>> - persistent_gnts[i]->dev_bus_addr =
>>> - map[j++].dev_bus_addr;
>>> + persistent_gnts[i]->handle = map[j++].handle;
>>> }
>>> pending_handle(pending_req, i) =
>>> persistent_gnts[i]->handle;
>>>
>>> if (ret)
>>> continue;
>>> -
>>> - seg[i].buf = persistent_gnts[i]->dev_bus_addr |
>>> - (req->u.rw.seg[i].first_sect << 9);
>>> } else {
>>> - pending_handle(pending_req, i) = map[j].handle;
>>> + pending_handle(pending_req, i) = map[j++].handle;
>>> bitmap_set(pending_req->unmap_seg, i, 1);
>>>
>>> - if (ret) {
>>> - j++;
>>> + if (ret)
>>> continue;
>>> - }
>>> -
>>> - seg[i].buf = map[j++].dev_bus_addr |
>>> - (req->u.rw.seg[i].first_sect << 9);
>>> }
>>> + seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
>>
>> ... this uses "i" as index on both sides, so ...
>>
>>> }
>>> return ret;
>>> }
>>> @@ -971,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
>>> (bio_add_page(bio,
>>> pages[i],
>>> seg[i].nsec << 9,
>>> - seg[i].buf & ~PAGE_MASK) == 0)) {
>>> + seg[i].offset & ~PAGE_MASK) == 0)) {
>>
>> ... this one could as well use the original field.
>>
>> And the masking with ~PAGE_MASK is not pointless in any case.
>
> Good point. In which might as well make the 'struct seg_buf' be an
> simple array of unsigned int.

I didn't do this because later on (when using indirect segments) I need
a place to store the offset, since I don't have the indirect segment
pages mapped during the whole operation, but I guess I could change that.

>>
>> Jan
>>
>>>
>>> bio = bio_alloc(GFP_KERNEL, nseg-i);
>>> if (unlikely(bio == NULL))
>>> diff --git a/drivers/block/xen-blkback/common.h
>>> b/drivers/block/xen-blkback/common.h
>>> index da78346..60103e2 100644
>>> --- a/drivers/block/xen-blkback/common.h
>>> +++ b/drivers/block/xen-blkback/common.h
>>> @@ -187,7 +187,6 @@ struct persistent_gnt {
>>> struct page *page;
>>> grant_ref_t gnt;
>>> grant_handle_t handle;
>>> - uint64_t dev_bus_addr;
>>> struct rb_node node;
>>> };
>>>
>>>
>>> _______________________________________________
>>> Xen-devel mailing list
>>> [email protected]
>>> http://lists.xen.org/xen-devel
>>
>>

2013-03-19 16:57:12

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH 1/5] xen-blkback: don't store dev_bus_addr

> >> And the masking with ~PAGE_MASK is not pointless in any case.
> >
> > Good point. In which might as well make the 'struct seg_buf' be an
> > simple array of unsigned int.
>
> I didn't do this because later on (when using indirect segments) I need
> a place to store the offset, since I don't have the indirect segment
> pages mapped during the whole operation, but I guess I could change that.

Ah, lets leave it then, and just do:

commit ffb1dabd1eb10c76a1e7af62f75a1aaa8d590b5a
Author: Roger Pau Monne <[email protected]>
Date: Mon Mar 18 17:49:32 2013 +0100

xen-blkback: don't store dev_bus_addr

dev_bus_addr returned in the grant ref map operation is the mfn of the
passed page, there's no need to store it in the persistent grant
entry, since we can always get it provided that we have the page.

This reduces the memory overhead of persistent grants in blkback.

While at it, rename the 'seg[i].buf' to be 'seg[i].offset' as
it makes much more sense - as we use that value in bio_add_page
which as the fourth argument expects the offset.

We hadn't used the physical address as part of this at all.

Signed-off-by: Roger Pau Monn? <[email protected]>
Cc: Konrad Rzeszutek Wilk <[email protected]>
Cc: [email protected]
[v1: s/buf/offset/]
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 2cf8381..dd5b2fe 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -442,7 +442,7 @@ int xen_blkif_schedule(void *arg)
}

struct seg_buf {
- unsigned long buf;
+ unsigned int offset;
unsigned int nsec;
};
/*
@@ -621,30 +621,21 @@ static int xen_blkbk_map(struct blkif_request *req,
* If this is a new persistent grant
* save the handler
*/
- persistent_gnts[i]->handle = map[j].handle;
- persistent_gnts[i]->dev_bus_addr =
- map[j++].dev_bus_addr;
+ persistent_gnts[i]->handle = map[j++].handle;
}
pending_handle(pending_req, i) =
persistent_gnts[i]->handle;

if (ret)
continue;
-
- seg[i].buf = persistent_gnts[i]->dev_bus_addr |
- (req->u.rw.seg[i].first_sect << 9);
} else {
- pending_handle(pending_req, i) = map[j].handle;
+ pending_handle(pending_req, i) = map[j++].handle;
bitmap_set(pending_req->unmap_seg, i, 1);

- if (ret) {
- j++;
+ if (ret)
continue;
- }
-
- seg[i].buf = map[j++].dev_bus_addr |
- (req->u.rw.seg[i].first_sect << 9);
}
+ seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
}
return ret;
}
@@ -971,7 +962,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
(bio_add_page(bio,
pages[i],
seg[i].nsec << 9,
- seg[i].buf & ~PAGE_MASK) == 0)) {
+ seg[i].offset) == 0)) {

bio = bio_alloc(GFP_KERNEL, nseg-i);
if (unlikely(bio == NULL))
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index da78346..60103e2 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -187,7 +187,6 @@ struct persistent_gnt {
struct page *page;
grant_ref_t gnt;
grant_handle_t handle;
- uint64_t dev_bus_addr;
struct rb_node node;
};