2023-06-12 07:52:10

by Jingbo Xu

[permalink] [raw]
Subject: [PATCH v2] block: fine-granular CAP_SYS_ADMIN for Persistent Reservation ioctl

Allow of unprivileged Persistent Reservation (PR) operations on devices
if the write permission check on the device node has passed.

Besides, refuse the unprivileged PR operations on partitions as
reservations on partitions doesn't make sense.

Signed-off-by: Jingbo Xu <[email protected]>
---
changes since RFC:
- only allow unprivileged reservations if the file descriptor is open
for write (Christoph Hellwig)
- refuse the unprivileged reservations on partitions (Christoph Hellwig)
(maybe this checking shall also be done when CAP_SYS_ADMIN is set?)

RFC: https://lore.kernel.org/all/[email protected]/
---
block/ioctl.c | 48 ++++++++++++++++++++++++++++++++----------------
1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/block/ioctl.c b/block/ioctl.c
index 9c5f637ff153..420dc4701f9c 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -254,13 +254,29 @@ int blkdev_compat_ptr_ioctl(struct block_device *bdev, fmode_t mode,
EXPORT_SYMBOL(blkdev_compat_ptr_ioctl);
#endif

-static int blkdev_pr_register(struct block_device *bdev,
+static bool blkdev_pr_allowed(struct block_device *bdev, fmode_t mode)
+{
+ if (capable(CAP_SYS_ADMIN))
+ return true;
+
+ /* no sense to make reservations for partitions */
+ if (bdev_is_partition(bdev))
+ return false;
+
+ /*
+ * Only allow unprivileged reservations if the file descriptor is open
+ * for writing.
+ */
+ return mode & FMODE_WRITE;
+}
+
+static int blkdev_pr_register(struct block_device *bdev, fmode_t mode,
struct pr_registration __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_registration reg;

- if (!capable(CAP_SYS_ADMIN))
+ if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_register)
return -EOPNOTSUPP;
@@ -272,13 +288,13 @@ static int blkdev_pr_register(struct block_device *bdev,
return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
}

-static int blkdev_pr_reserve(struct block_device *bdev,
+static int blkdev_pr_reserve(struct block_device *bdev, fmode_t mode,
struct pr_reservation __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_reservation rsv;

- if (!capable(CAP_SYS_ADMIN))
+ if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_reserve)
return -EOPNOTSUPP;
@@ -290,13 +306,13 @@ static int blkdev_pr_reserve(struct block_device *bdev,
return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags);
}

-static int blkdev_pr_release(struct block_device *bdev,
+static int blkdev_pr_release(struct block_device *bdev, fmode_t mode,
struct pr_reservation __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_reservation rsv;

- if (!capable(CAP_SYS_ADMIN))
+ if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_release)
return -EOPNOTSUPP;
@@ -308,13 +324,13 @@ static int blkdev_pr_release(struct block_device *bdev,
return ops->pr_release(bdev, rsv.key, rsv.type);
}

-static int blkdev_pr_preempt(struct block_device *bdev,
+static int blkdev_pr_preempt(struct block_device *bdev, fmode_t mode,
struct pr_preempt __user *arg, bool abort)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_preempt p;

- if (!capable(CAP_SYS_ADMIN))
+ if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_preempt)
return -EOPNOTSUPP;
@@ -326,13 +342,13 @@ static int blkdev_pr_preempt(struct block_device *bdev,
return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
}

-static int blkdev_pr_clear(struct block_device *bdev,
+static int blkdev_pr_clear(struct block_device *bdev, fmode_t mode,
struct pr_clear __user *arg)
{
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
struct pr_clear c;

- if (!capable(CAP_SYS_ADMIN))
+ if (!blkdev_pr_allowed(bdev, mode))
return -EPERM;
if (!ops || !ops->pr_clear)
return -EOPNOTSUPP;
@@ -534,17 +550,17 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
case BLKTRACETEARDOWN:
return blk_trace_ioctl(bdev, cmd, argp);
case IOC_PR_REGISTER:
- return blkdev_pr_register(bdev, argp);
+ return blkdev_pr_register(bdev, mode, argp);
case IOC_PR_RESERVE:
- return blkdev_pr_reserve(bdev, argp);
+ return blkdev_pr_reserve(bdev, mode, argp);
case IOC_PR_RELEASE:
- return blkdev_pr_release(bdev, argp);
+ return blkdev_pr_release(bdev, mode, argp);
case IOC_PR_PREEMPT:
- return blkdev_pr_preempt(bdev, argp, false);
+ return blkdev_pr_preempt(bdev, mode, argp, false);
case IOC_PR_PREEMPT_ABORT:
- return blkdev_pr_preempt(bdev, argp, true);
+ return blkdev_pr_preempt(bdev, mode, argp, true);
case IOC_PR_CLEAR:
- return blkdev_pr_clear(bdev, argp);
+ return blkdev_pr_clear(bdev, mode, argp);
default:
return -ENOIOCTLCMD;
}
--
2.19.1.6.gb485710b



2023-06-13 05:12:04

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v2] block: fine-granular CAP_SYS_ADMIN for Persistent Reservation ioctl

> +static bool blkdev_pr_allowed(struct block_device *bdev, fmode_t mode)

With my resent series the use of fmode_t in the block layer has been
replaced with a new blk_mode_t, so you'll need to rebase.

> +{
> + if (capable(CAP_SYS_ADMIN))
> + return true;
> +
> + /* no sense to make reservations for partitions */
> + if (bdev_is_partition(bdev))
> + return false;

I think we should disallow the PR ioctls for partitions entirely.
Yes, that's a change of behavior (and should be a separate patch),
but this is the right time to fix it.

Otherwise this change log good, thanks a lot!


2023-06-13 06:42:44

by Jingbo Xu

[permalink] [raw]
Subject: Re: [PATCH v2] block: fine-granular CAP_SYS_ADMIN for Persistent Reservation ioctl



On 6/13/23 1:02 PM, Christoph Hellwig wrote:
>> +static bool blkdev_pr_allowed(struct block_device *bdev, fmode_t mode)
>
> With my resent series the use of fmode_t in the block layer has been
> replaced with a new blk_mode_t, so you'll need to rebase.

Okay I will rebase on that in the next version.

>
>> +{
>> + if (capable(CAP_SYS_ADMIN))
>> + return true;
>> +
>> + /* no sense to make reservations for partitions */
>> + if (bdev_is_partition(bdev))
>> + return false;
>
> I think we should disallow the PR ioctls for partitions entirely.
> Yes, that's a change of behavior (and should be a separate patch),
> but this is the right time to fix it.

Okay I will make it into a separate patch later.


> Otherwise this change log good, thanks a lot!

Thanks for the comment and suggestions!


--
Thanks,
Jingbo