These two patches allow userspace to provide an expected diskseq of a
block device and discover when blkback has opened the device. Together,
these features allow using blkback with delete-on-close block devices,
such as loop devices with autoclear set.
Demi Marie Obenour (2):
xen-blkback: Implement diskseq checks
xen-blkback: Inform userspace that device has been opened
drivers/block/xen-blkback/xenbus.c | 147 ++++++++++++++++++++++++-----
1 file changed, 124 insertions(+), 23 deletions(-)
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab
Set "opened" to "0" before the hotplug script is called. Once the
device node has been opened, set "opened" to "1".
"opened" is used exclusively by userspace. It serves two purposes:
1. It tells userspace that the diskseq Xenstore entry is supported.
2. It tells userspace that it can wait for "opened" to be set to 1.
Once "opened" is 1, blkback has a reference to the device, so
userspace doesn't need to keep one.
Together, these changes allow userspace to use block devices with
delete-on-close behavior, such as loop devices with the autoclear flag
set.
Signed-off-by: Demi Marie Obenour <[email protected]>
---
drivers/block/xen-blkback/xenbus.c | 35 ++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 9c3eb148fbd802c74e626c3d7bcd69dcb09bd921..519a78aa9073d1faa1dce5c1b36e95ae58da534b 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -3,6 +3,20 @@
Copyright (C) 2005 Rusty Russell <[email protected]>
Copyright (C) 2005 XenSource Ltd
+In addition to the Xenstore nodes required by the Xen block device
+specification, this implementation of blkback uses a new Xenstore
+node: "opened". blkback sets "opened" to "0" before the hotplug script
+is called. Once the device node has been opened, blkback sets "opened"
+to "1".
+
+"opened" is read exclusively by userspace. It serves two purposes:
+
+1. It tells userspace that diskseq@major:minor syntax for "physical-device" is
+ supported.
+
+2. It tells userspace that it can wait for "opened" to be set to 1 after writing
+ "physical-device". Once "opened" is 1, blkback has a reference to the
+ device, so userspace doesn't need to keep one.
*/
@@ -699,6 +713,14 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
if (err)
pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
+ /*
+ * This informs userspace that the "opened" node will be set to "1" when
+ * the device has been opened successfully.
+ */
+ err = xenbus_write(XBT_NIL, dev->nodename, "opened", "0");
+ if (err)
+ goto fail;
+
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
@@ -826,6 +848,19 @@ static void backend_changed(struct xenbus_watch *watch,
goto fail;
}
+ /*
+ * Tell userspace that the device has been opened and that blkback has a
+ * reference to it. Userspace can then close the device or mark it as
+ * delete-on-close, knowing that blkback will keep the device open as
+ * long as necessary.
+ */
+ err = xenbus_write(XBT_NIL, dev->nodename, "opened", "1");
+ if (err) {
+ xenbus_dev_fatal(dev, err, "%s: notifying userspace device has been opened",
+ dev->nodename);
+ goto free_vbd;
+ }
+
err = xenvbd_sysfs_addif(dev);
if (err) {
xenbus_dev_fatal(dev, err, "creating sysfs entries");
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab
This allows specifying a disk sequence number in XenStore. If it does
not match the disk sequence number of the underlying device, the device
will not be exported and a warning will be logged. Userspace can use
this to eliminate race conditions due to major/minor number reuse.
Old kernels do not support the new syntax, but a later patch will allow
userspace to discover that the new syntax is supported.
Signed-off-by: Demi Marie Obenour <[email protected]>
---
drivers/block/xen-blkback/xenbus.c | 112 +++++++++++++++++++++++------
1 file changed, 89 insertions(+), 23 deletions(-)
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 4807af1d58059394d7a992335dabaf2bc3901721..9c3eb148fbd802c74e626c3d7bcd69dcb09bd921 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -24,6 +24,7 @@ struct backend_info {
struct xenbus_watch backend_watch;
unsigned major;
unsigned minor;
+ unsigned long long diskseq;
char *mode;
};
@@ -479,7 +480,7 @@ static void xen_vbd_free(struct xen_vbd *vbd)
static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
unsigned major, unsigned minor, int readonly,
- int cdrom)
+ bool cdrom, u64 diskseq)
{
struct xen_vbd *vbd;
struct block_device *bdev;
@@ -507,6 +508,26 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
xen_vbd_free(vbd);
return -ENOENT;
}
+
+ if (diskseq) {
+ struct gendisk *disk = bdev->bd_disk;
+
+ if (unlikely(disk == NULL)) {
+ pr_err("%s: device %08x has no gendisk\n",
+ __func__, vbd->pdevice);
+ xen_vbd_free(vbd);
+ return -EFAULT;
+ }
+
+ if (unlikely(disk->diskseq != diskseq)) {
+ pr_warn("%s: device %08x has incorrect sequence "
+ "number 0x%llx (expected 0x%llx)\n",
+ __func__, vbd->pdevice, disk->diskseq, diskseq);
+ xen_vbd_free(vbd);
+ return -ENODEV;
+ }
+ }
+
vbd->size = vbd_sz(vbd);
if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
@@ -707,6 +728,9 @@ static void backend_changed(struct xenbus_watch *watch,
int cdrom = 0;
unsigned long handle;
char *device_type;
+ char *diskseq_str = NULL;
+ int diskseq_len;
+ unsigned long long diskseq;
pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
@@ -725,10 +749,46 @@ static void backend_changed(struct xenbus_watch *watch,
return;
}
- if (be->major | be->minor) {
- if (be->major != major || be->minor != minor)
- pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
- be->major, be->minor, major, minor);
+ diskseq_str = xenbus_read(XBT_NIL, dev->nodename, "diskseq", &diskseq_len);
+ if (IS_ERR(diskseq_str)) {
+ int err = PTR_ERR(diskseq_str);
+ diskseq_str = NULL;
+
+ /*
+ * If this does not exist, it means legacy userspace that does not
+ * support diskseq.
+ */
+ if (unlikely(!XENBUS_EXIST_ERR(err))) {
+ xenbus_dev_fatal(dev, err, "reading diskseq");
+ return;
+ }
+ diskseq = 0;
+ } else if (diskseq_len <= 0) {
+ xenbus_dev_fatal(dev, -EFAULT, "diskseq must not be empty");
+ goto fail;
+ } else if (diskseq_len > 16) {
+ xenbus_dev_fatal(dev, -ERANGE, "diskseq too long: got %d but limit is 16",
+ diskseq_len);
+ goto fail;
+ } else if (diskseq_str[0] == '0') {
+ xenbus_dev_fatal(dev, -ERANGE, "diskseq must not start with '0'");
+ goto fail;
+ } else {
+ char *diskseq_end;
+ diskseq = simple_strtoull(diskseq_str, &diskseq_end, 16);
+ if (diskseq_end != diskseq_str + diskseq_len) {
+ xenbus_dev_fatal(dev, -EINVAL, "invalid diskseq");
+ goto fail;
+ }
+ kfree(diskseq_str);
+ diskseq_str = NULL;
+ }
+
+ if (be->major | be->minor | be->diskseq) {
+ if (be->major != major || be->minor != minor || be->diskseq != diskseq)
+ pr_warn("changing physical device (from %x:%x:%llx to %x:%x:%llx)"
+ " not supported.\n",
+ be->major, be->minor, be->diskseq, major, minor, diskseq);
return;
}
@@ -756,29 +816,35 @@ static void backend_changed(struct xenbus_watch *watch,
be->major = major;
be->minor = minor;
+ be->diskseq = diskseq;
err = xen_vbd_create(be->blkif, handle, major, minor,
- !strchr(be->mode, 'w'), cdrom);
-
- if (err)
- xenbus_dev_fatal(dev, err, "creating vbd structure");
- else {
- err = xenvbd_sysfs_addif(dev);
- if (err) {
- xen_vbd_free(&be->blkif->vbd);
- xenbus_dev_fatal(dev, err, "creating sysfs entries");
- }
- }
+ !strchr(be->mode, 'w'), cdrom, diskseq);
if (err) {
- kfree(be->mode);
- be->mode = NULL;
- be->major = 0;
- be->minor = 0;
- } else {
- /* We're potentially connected now */
- xen_update_blkif_status(be->blkif);
+ xenbus_dev_fatal(dev, err, "creating vbd structure");
+ goto fail;
}
+
+ err = xenvbd_sysfs_addif(dev);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "creating sysfs entries");
+ goto free_vbd;
+ }
+
+ /* We're potentially connected now */
+ xen_update_blkif_status(be->blkif);
+ return;
+
+free_vbd:
+ xen_vbd_free(&be->blkif->vbd);
+fail:
+ kfree(diskseq_str);
+ kfree(be->mode);
+ be->mode = NULL;
+ be->major = 0;
+ be->minor = 0;
+ be->diskseq = 0;
}
/*
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab
> --- a/drivers/block/xen-blkback/xenbus.c
> +++ b/drivers/block/xen-blkback/xenbus.c
> @@ -3,6 +3,20 @@
> Copyright (C) 2005 Rusty Russell <[email protected]>
> Copyright (C) 2005 XenSource Ltd
>
> +In addition to the Xenstore nodes required by the Xen block device
> +specification, this implementation of blkback uses a new Xenstore
> +node: "opened". blkback sets "opened" to "0" before the hotplug script
> +is called. Once the device node has been opened, blkback sets "opened"
> +to "1".
This is a really odd comment style, and a really strange place for it.
To me it feels like this should just be a file in Documentation as it
relates to how to use the driver, and doesn't really explain the code.
On Thu, Jun 01, 2023 at 05:48:22PM -0400, Demi Marie Obenour wrote:
> + if (diskseq) {
> + struct gendisk *disk = bdev->bd_disk;
> +
> + if (unlikely(disk == NULL)) {
> + pr_err("%s: device %08x has no gendisk\n",
> + __func__, vbd->pdevice);
> + xen_vbd_free(vbd);
> + return -EFAULT;
> + }
bdev->bd_disk is never NULL.
> + diskseq_str = xenbus_read(XBT_NIL, dev->nodename, "diskseq", &diskseq_len);
Please avoid the overly long line.
> + if (IS_ERR(diskseq_str)) {
> + int err = PTR_ERR(diskseq_str);
> + diskseq_str = NULL;
> +
> + /*
> + * If this does not exist, it means legacy userspace that does not
.. even more so in comments.
> + * support diskseq.
> + */
> + if (unlikely(!XENBUS_EXIST_ERR(err))) {
> + xenbus_dev_fatal(dev, err, "reading diskseq");
> + return;
> + }
> + diskseq = 0;
> + } else if (diskseq_len <= 0) {
> + xenbus_dev_fatal(dev, -EFAULT, "diskseq must not be empty");
> + goto fail;
> + } else if (diskseq_len > 16) {
No need for a else after a return.
> + xenbus_dev_fatal(dev, -ERANGE, "diskseq too long: got %d but limit is 16",
> + diskseq_len);
> + goto fail;
> + } else if (diskseq_str[0] == '0') {
> + xenbus_dev_fatal(dev, -ERANGE, "diskseq must not start with '0'");
> + goto fail;
> + } else {
> + char *diskseq_end;
> + diskseq = simple_strtoull(diskseq_str, &diskseq_end, 16);
> + if (diskseq_end != diskseq_str + diskseq_len) {
> + xenbus_dev_fatal(dev, -EINVAL, "invalid diskseq");
> + goto fail;
> + }
> + kfree(diskseq_str);
> + diskseq_str = NULL;
> + }
And I suspect the code will be a lot easier to follow if you move
the diskseq validation into a separate helper.
On Wed, Jun 07, 2023 at 12:46:16AM -0700, Christoph Hellwig wrote:
> > --- a/drivers/block/xen-blkback/xenbus.c
> > +++ b/drivers/block/xen-blkback/xenbus.c
> > @@ -3,6 +3,20 @@
> > Copyright (C) 2005 Rusty Russell <[email protected]>
> > Copyright (C) 2005 XenSource Ltd
> >
> > +In addition to the Xenstore nodes required by the Xen block device
> > +specification, this implementation of blkback uses a new Xenstore
> > +node: "opened". blkback sets "opened" to "0" before the hotplug script
> > +is called. Once the device node has been opened, blkback sets "opened"
> > +to "1".
>
> This is a really odd comment style, and a really strange place for it.
> To me it feels like this should just be a file in Documentation as it
> relates to how to use the driver, and doesn't really explain the code.
Will fix in v3.
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab
On Wed, Jun 07, 2023 at 12:44:22AM -0700, Christoph Hellwig wrote:
> On Thu, Jun 01, 2023 at 05:48:22PM -0400, Demi Marie Obenour wrote:
> > + if (diskseq) {
> > + struct gendisk *disk = bdev->bd_disk;
> > +
> > + if (unlikely(disk == NULL)) {
> > + pr_err("%s: device %08x has no gendisk\n",
> > + __func__, vbd->pdevice);
> > + xen_vbd_free(vbd);
> > + return -EFAULT;
> > + }
>
> bdev->bd_disk is never NULL.
Fixed in v3.
> > + diskseq_str = xenbus_read(XBT_NIL, dev->nodename, "diskseq", &diskseq_len);
>
> Please avoid the overly long line.
Fixed in v3.
> > + if (IS_ERR(diskseq_str)) {
> > + int err = PTR_ERR(diskseq_str);
> > + diskseq_str = NULL;
> > +
> > + /*
> > + * If this does not exist, it means legacy userspace that does not
>
> .. even more so in comments.
Fixed in v3.
> > + * support diskseq.
> > + */
> > + if (unlikely(!XENBUS_EXIST_ERR(err))) {
> > + xenbus_dev_fatal(dev, err, "reading diskseq");
> > + return;
> > + }
> > + diskseq = 0;
> > + } else if (diskseq_len <= 0) {
> > + xenbus_dev_fatal(dev, -EFAULT, "diskseq must not be empty");
> > + goto fail;
> > + } else if (diskseq_len > 16) {
>
> No need for a else after a return.
Fixed in v3.
> > + xenbus_dev_fatal(dev, -ERANGE, "diskseq too long: got %d but limit is 16",
> > + diskseq_len);
> > + goto fail;
> > + } else if (diskseq_str[0] == '0') {
> > + xenbus_dev_fatal(dev, -ERANGE, "diskseq must not start with '0'");
> > + goto fail;
> > + } else {
> > + char *diskseq_end;
> > + diskseq = simple_strtoull(diskseq_str, &diskseq_end, 16);
> > + if (diskseq_end != diskseq_str + diskseq_len) {
> > + xenbus_dev_fatal(dev, -EINVAL, "invalid diskseq");
> > + goto fail;
> > + }
> > + kfree(diskseq_str);
> > + diskseq_str = NULL;
> > + }
>
> And I suspect the code will be a lot easier to follow if you move
> the diskseq validation into a separate helper.
Fixed in v3.
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab