2021-05-21 20:28:26

by Anthony Krowiak

[permalink] [raw]
Subject: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

The function pointer to the handler that processes interception of the
PQAP instruction is contained in the mdev. If the mdev is removed and
its storage de-allocated during the processing of the PQAP instruction,
the function pointer could get wiped out before the function is called
because there is currently nothing that controls access to it.

This patch introduces two new functions:
* The kvm_arch_crypto_register_hook() function registers a function pointer
for processing intercepted crypto instructions.
* The kvm_arch_crypto_register_hook() function un-registers a function
pointer that was previously registered.

Registration and unregistration of function pointers is protected by a
mutex lock. This lock is also employed when the hook is retrieved and the
hook function is called to protect against losing access to the function
while an intercepted crypto instruction is being processed.

The PQAP instruction handler function pointer is registered at the time
the vfio_ap module is loaded and unregistered when it is unloaded; so,
the lifespan of the function pointer is concurrent with the lifespan of
the vfio_ap module.

Signed-off-by: Tony Krowiak <[email protected]>
---
arch/s390/include/asm/kvm_host.h | 13 +++--
arch/s390/kvm/priv.c | 70 +++++++++++++++++++++++++--
drivers/s390/crypto/vfio_ap_ops.c | 37 +++++++++++---
drivers/s390/crypto/vfio_ap_private.h | 1 -
4 files changed, 105 insertions(+), 16 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8925f3969478..d59b9309a6b8 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -803,14 +803,19 @@ struct kvm_s390_cpu_model {
unsigned short ibc;
};

-struct kvm_s390_module_hook {
- int (*hook)(struct kvm_vcpu *vcpu);
+enum kvm_s390_crypto_hook_type {
+ PQAP_HOOK
+};
+
+struct kvm_s390_crypto_hook {
+ enum kvm_s390_crypto_hook_type type;
struct module *owner;
+ int (*hook_fcn)(struct kvm_vcpu *vcpu);
+ struct list_head node;
};

struct kvm_s390_crypto {
struct kvm_s390_crypto_cb *crycb;
- struct kvm_s390_module_hook *pqap_hook;
__u32 crycbd;
__u8 aes_kw;
__u8 dea_kw;
@@ -996,6 +1001,8 @@ static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {}
void kvm_arch_crypto_clear_masks(struct kvm *kvm);
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm);
+extern int kvm_arch_crypto_register_hook(struct kvm_s390_crypto_hook *hook);
+extern int kvm_arch_crypto_unregister_hook(struct kvm_s390_crypto_hook *hook);

extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 9928f785c677..1221c04f6f6a 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -12,6 +12,7 @@
#include <linux/gfp.h>
#include <linux/errno.h>
#include <linux/compat.h>
+#include <linux/list.h>
#include <linux/mm_types.h>
#include <linux/pgtable.h>

@@ -31,6 +32,63 @@
#include "kvm-s390.h"
#include "trace.h"

+DEFINE_MUTEX(crypto_hooks_lock);
+static struct list_head crypto_hooks = { &(crypto_hooks), &(crypto_hooks) };
+
+static struct kvm_s390_crypto_hook
+*kvm_arch_crypto_find_hook(enum kvm_s390_crypto_hook_type type)
+{
+ struct kvm_s390_crypto_hook *crypto_hook;
+
+ list_for_each_entry(crypto_hook, &crypto_hooks, node) {
+ if (crypto_hook->type == type)
+ return crypto_hook;
+ }
+
+ return NULL;
+}
+
+int kvm_arch_crypto_register_hook(struct kvm_s390_crypto_hook *hook)
+{
+ struct kvm_s390_crypto_hook *crypto_hook;
+
+ mutex_lock(&crypto_hooks_lock);
+ crypto_hook = kvm_arch_crypto_find_hook(hook->type);
+ if (crypto_hook) {
+ if (crypto_hook->owner != hook->owner)
+ return -EACCES;
+ list_replace(&crypto_hook->node, &hook->node);
+ } else {
+ list_add(&hook->node, &crypto_hooks);
+ }
+ mutex_unlock(&crypto_hooks_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_arch_crypto_register_hook);
+
+int kvm_arch_crypto_unregister_hook(struct kvm_s390_crypto_hook *hook)
+{
+ struct kvm_s390_crypto_hook *crypto_hook;
+
+ mutex_lock(&crypto_hooks_lock);
+ crypto_hook = kvm_arch_crypto_find_hook(hook->type);
+
+ if (crypto_hook) {
+ if (crypto_hook->owner != hook->owner)
+ return -EACCES;
+ if (crypto_hook->hook_fcn != hook->hook_fcn)
+ return -EFAULT;
+ list_del(&crypto_hook->node);
+ } else {
+ return -ENODEV;
+ }
+ mutex_unlock(&crypto_hooks_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_arch_crypto_unregister_hook);
+
static int handle_ri(struct kvm_vcpu *vcpu)
{
vcpu->stat.instruction_ri++;
@@ -610,6 +668,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
static int handle_pqap(struct kvm_vcpu *vcpu)
{
struct ap_queue_status status = {};
+ struct kvm_s390_crypto_hook *pqap_hook;
unsigned long reg0;
int ret;
uint8_t fc;
@@ -657,15 +716,16 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
* Verify that the hook callback is registered, lock the owner
* and call the hook.
*/
- if (vcpu->kvm->arch.crypto.pqap_hook) {
- if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
- return -EOPNOTSUPP;
- ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
- module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);
+ mutex_lock(&crypto_hooks_lock);
+ pqap_hook = kvm_arch_crypto_find_hook(PQAP_HOOK);
+ if (pqap_hook) {
+ ret = pqap_hook->hook_fcn(vcpu);
if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
kvm_s390_set_psw_cc(vcpu, 3);
+ mutex_unlock(&crypto_hooks_lock);
return ret;
}
+ mutex_unlock(&crypto_hooks_lock);
/*
* A vfio_driver must register a hook.
* No hook means no driver to enable the SIE CRYCB and no queues.
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index f90c9103dac2..3466ceffc46a 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -64,6 +64,21 @@ static struct vfio_ap_queue *vfio_ap_get_queue(
return q;
}

+static struct ap_matrix_mdev *vfio_ap_find_mdev_for_apqn(int apqn)
+{
+ struct ap_matrix_mdev *matrix_mdev;
+ unsigned long apid = AP_QID_CARD(apqn);
+ unsigned long apqi = AP_QID_QUEUE(apqn);
+
+ list_for_each_entry(matrix_mdev, &matrix_dev->mdev_list, node) {
+ if (test_bit_inv(apid, matrix_mdev->matrix.apm) &&
+ test_bit_inv(apqi, matrix_mdev->matrix.aqm))
+ return matrix_mdev;
+ }
+
+ return NULL;
+}
+
/**
* vfio_ap_wait_for_irqclear
* @apqn: The AP Queue number
@@ -287,13 +302,13 @@ static int handle_pqap(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.sie_block->eca & ECA_AIV))
return -EOPNOTSUPP;

+
apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
mutex_lock(&matrix_dev->lock);

- if (!vcpu->kvm->arch.crypto.pqap_hook)
+ matrix_mdev = vfio_ap_find_mdev_for_apqn(apqn);
+ if (!matrix_mdev)
goto out_unlock;
- matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
- struct ap_matrix_mdev, pqap_hook);

/*
* If the KVM pointer is in the process of being set, wait until the
@@ -353,8 +368,6 @@ static int vfio_ap_mdev_create(struct mdev_device *mdev)
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
init_waitqueue_head(&matrix_mdev->wait_for_kvm);
mdev_set_drvdata(mdev, matrix_mdev);
- matrix_mdev->pqap_hook.hook = handle_pqap;
- matrix_mdev->pqap_hook.owner = THIS_MODULE;
mutex_lock(&matrix_dev->lock);
list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
mutex_unlock(&matrix_dev->lock);
@@ -1123,7 +1136,6 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
matrix_mdev->matrix.aqm,
matrix_mdev->matrix.adm);
mutex_lock(&matrix_dev->lock);
- kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
matrix_mdev->kvm = kvm;
matrix_mdev->kvm_busy = false;
wake_up_all(&matrix_mdev->wait_for_kvm);
@@ -1193,7 +1205,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
mutex_lock(&matrix_dev->lock);
vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
- matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
kvm_put_kvm(matrix_mdev->kvm);
matrix_mdev->kvm = NULL;
matrix_mdev->kvm_busy = false;
@@ -1433,14 +1444,26 @@ static const struct mdev_parent_ops vfio_ap_matrix_ops = {
.ioctl = vfio_ap_mdev_ioctl,
};

+static struct kvm_s390_crypto_hook pqap_hook = {
+ .type = PQAP_HOOK,
+ .owner = THIS_MODULE,
+ .hook_fcn = handle_pqap
+};
+
int vfio_ap_mdev_register(void)
{
+ int ret;
atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);

+ ret = kvm_arch_crypto_register_hook(&pqap_hook);
+ if (ret)
+ return ret;
+
return mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_ops);
}

void vfio_ap_mdev_unregister(void)
{
+ WARN_ON(kvm_arch_crypto_unregister_hook(&pqap_hook));
mdev_unregister_device(&matrix_dev->device);
}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index f82a6396acae..542259b57972 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -86,7 +86,6 @@ struct ap_matrix_mdev {
bool kvm_busy;
wait_queue_head_t wait_for_kvm;
struct kvm *kvm;
- struct kvm_s390_module_hook pqap_hook;
struct mdev_device *mdev;
};

--
2.30.2


2021-05-23 22:59:16

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On Fri, May 21, 2021 at 03:36:48PM -0400, Tony Krowiak wrote:
> +static struct kvm_s390_crypto_hook
> +*kvm_arch_crypto_find_hook(enum kvm_s390_crypto_hook_type type)
> +{
> + struct kvm_s390_crypto_hook *crypto_hook;
> +
> + list_for_each_entry(crypto_hook, &crypto_hooks, node) {
> + if (crypto_hook->type == type)
> + return crypto_hook;
> + }
> +
> + return NULL;
> +}
> +
> +int kvm_arch_crypto_register_hook(struct kvm_s390_crypto_hook *hook)
> +{
> + struct kvm_s390_crypto_hook *crypto_hook;
> +
> + mutex_lock(&crypto_hooks_lock);
> + crypto_hook = kvm_arch_crypto_find_hook(hook->type);
> + if (crypto_hook) {
> + if (crypto_hook->owner != hook->owner)
> + return -EACCES;
> + list_replace(&crypto_hook->node, &hook->node);

This is all dead code right? This is only called from a module init
function so it can't be called twice. Just always fail if the hook is
already used and delete the owner stuff.

But this is alot of complicated and unused code to solve a lock
ordering problem..

Jason

2021-05-24 14:39:05

by Jason J. Herne

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On 5/21/21 3:36 PM, Tony Krowiak wrote:
> The function pointer to the handler that processes interception of the
> PQAP instruction is contained in the mdev. If the mdev is removed and
> its storage de-allocated during the processing of the PQAP instruction,
> the function pointer could get wiped out before the function is called
> because there is currently nothing that controls access to it.
>
> This patch introduces two new functions:
> * The kvm_arch_crypto_register_hook() function registers a function pointer
> for processing intercepted crypto instructions.
> * The kvm_arch_crypto_register_hook() function un-registers a function
> pointer that was previously registered.

Typo: You meant kvm_arch_crypto_UNregister_hook() in the second bullet.


Just one overall observation on this one. The whole hook system seems kind of
over-engineered if this is our only use for it. It looks like a kvm_s390_crypto_hook is
meant to link a specific module with a function pointer. Do we really need this concept?

I think a simpler design could be to just place a mutex and a function pointer in the
kvm_s390_crypto struct. Then you can grab the mutex in vfio_ap_ops.c when
registering/unregistering. You would also grab the mutex in priv.c when calling the
function pointer. What I am suggesting is essentially the exact same scheme you have
implemented here, but simpler and with less infrastructure.

With that said, I'll point out that I am relative new to this code (and this patch series)
so maybe I've missed something and the extra complexity is needed for some reason. But if
it is not, I'm all in favor of keeping things simple.

--
-- Jason J. Herne ([email protected])

2021-05-25 13:18:23

by Anthony Krowiak

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler



On 5/24/21 10:37 AM, Jason J. Herne wrote:
> On 5/21/21 3:36 PM, Tony Krowiak wrote:
>> The function pointer to the handler that processes interception of the
>> PQAP instruction is contained in the mdev. If the mdev is removed and
>> its storage de-allocated during the processing of the PQAP instruction,
>> the function pointer could get wiped out before the function is called
>> because there is currently nothing that controls access to it.
>>
>> This patch introduces two new functions:
>> * The kvm_arch_crypto_register_hook() function registers a function
>> pointer
>>    for processing intercepted crypto instructions.
>> * The kvm_arch_crypto_register_hook() function un-registers a function
>>    pointer that was previously registered.
>
> Typo: You meant kvm_arch_crypto_UNregister_hook() in the second bullet.
>
>
> Just one overall observation on this one. The whole hook system seems
> kind of over-engineered if this is our only use for it. It looks like
> a kvm_s390_crypto_hook is meant to link a specific module with a
> function pointer. Do we really need this concept?
>
> I think a simpler design could be to just place a mutex and a function
> pointer in the kvm_s390_crypto struct. Then you can grab the mutex in
> vfio_ap_ops.c when registering/unregistering. You would also grab the
> mutex in priv.c when calling the function pointer. What I am
> suggesting is essentially the exact same scheme you have implemented
> here, but simpler and with less infrastructure.

That would be great, however; when I implemented something similar, it
resulted in a
lockdep splat between the lock used to protect the hook and the
matrix_dev->lock used to
protect updates to matrix_mdev (including the freeing thereof). After
pulling what little hair
I have left out, this seemed like a reasonable solution, over-engineered
though it may be.
If somebody has a simpler solution, I'm all ears.

>
> With that said, I'll point out that I am relative new to this code
> (and this patch series) so maybe I've missed something and the extra
> complexity is needed for some reason. But if it is not, I'm all in
> favor of keeping things simple.
>

2021-05-25 13:21:28

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On Tue, May 25, 2021 at 09:16:30AM -0400, Tony Krowiak wrote:
>
>
> On 5/24/21 10:37 AM, Jason J. Herne wrote:
> > On 5/21/21 3:36 PM, Tony Krowiak wrote:
> > > The function pointer to the handler that processes interception of the
> > > PQAP instruction is contained in the mdev. If the mdev is removed and
> > > its storage de-allocated during the processing of the PQAP instruction,
> > > the function pointer could get wiped out before the function is called
> > > because there is currently nothing that controls access to it.
> > >
> > > This patch introduces two new functions:
> > > * The kvm_arch_crypto_register_hook() function registers a function
> > > pointer
> > >    for processing intercepted crypto instructions.
> > > * The kvm_arch_crypto_register_hook() function un-registers a function
> > >    pointer that was previously registered.
> >
> > Typo: You meant kvm_arch_crypto_UNregister_hook() in the second bullet.
> >
> >
> > Just one overall observation on this one. The whole hook system seems
> > kind of over-engineered if this is our only use for it. It looks like a
> > kvm_s390_crypto_hook is meant to link a specific module with a function
> > pointer. Do we really need this concept?
> >
> > I think a simpler design could be to just place a mutex and a function
> > pointer in the kvm_s390_crypto struct. Then you can grab the mutex in
> > vfio_ap_ops.c when registering/unregistering. You would also grab the
> > mutex in priv.c when calling the function pointer. What I am suggesting
> > is essentially the exact same scheme you have implemented here, but
> > simpler and with less infrastructure.
>
> That would be great, however; when I implemented something similar, it
> resulted in a
> lockdep splat between the lock used to protect the hook and the
> matrix_dev->lock used to
> protect updates to matrix_mdev (including the freeing thereof). After
> pulling what little hair
> I have left out, this seemed like a reasonable solution, over-engineered
> though it may be.
> If somebody has a simpler solution, I'm all ears.

Why can't you put the locks in the right order? It looked trivial, I'm confused.

Jason

2021-05-25 13:30:41

by Jason J. Herne

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On 5/24/21 10:37 AM, Jason J. Herne wrote:
> On 5/21/21 3:36 PM, Tony Krowiak wrote:
>> The function pointer to the handler that processes interception of the
>> PQAP instruction is contained in the mdev. If the mdev is removed and
>> its storage de-allocated during the processing of the PQAP instruction,
>> the function pointer could get wiped out before the function is called
>> because there is currently nothing that controls access to it.
>>
>> This patch introduces two new functions:
>> * The kvm_arch_crypto_register_hook() function registers a function pointer
>>    for processing intercepted crypto instructions.
>> * The kvm_arch_crypto_register_hook() function un-registers a function
>>    pointer that was previously registered.
>
> Typo: You meant kvm_arch_crypto_UNregister_hook() in the second bullet.
>
>
> Just one overall observation on this one. The whole hook system seems kind of
> over-engineered if this is our only use for it. It looks like a kvm_s390_crypto_hook is
> meant to link a specific module with a function pointer. Do we really need this concept?
>
> I think a simpler design could be to just place a mutex and a function pointer in the
> kvm_s390_crypto struct. Then you can grab the mutex in vfio_ap_ops.c when
> registering/unregistering. You would also grab the mutex in priv.c when calling the
> function pointer. What I am suggesting is essentially the exact same scheme you have
> implemented here, but simpler and with less infrastructure.
>
> With that said, I'll point out that I am relative new to this code (and this patch series)
> so maybe I've missed something and the extra complexity is needed for some reason. But if
> it is not, I'm all in favor of keeping things simple.
>

After thinking about this problem a bit more, I'm wondering if we can remove the lock
entirely. How about we store a function pointer in kvm_s390_crypto? Initially that
function pointer will point to a stub function that handles the error case, exactly like
it is done in priv.c:handle_pqap() today when the function pointer would be NULL. When the
ap module loads, we can simply change the function pointer to point to
vfio_ap_ops:handle_pqap(). When we unload the module we change the function pointer back
to the stub. The updates should be atomic operations so no lock needed, right?

--
-- Jason J. Herne ([email protected])

2021-05-25 13:31:46

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On Tue, May 25, 2021 at 09:24:59AM -0400, Jason J. Herne wrote:
> change the function pointer to point to vfio_ap_ops:handle_pqap(). When we
> unload the module we change the function pointer back to the stub. The
> updates should be atomic operations so no lock needed, right?

No

Jason

2021-05-25 14:12:33

by Jason J. Herne

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On 5/25/21 9:26 AM, Jason Gunthorpe wrote:
> On Tue, May 25, 2021 at 09:24:59AM -0400, Jason J. Herne wrote:
>> change the function pointer to point to vfio_ap_ops:handle_pqap(). When we
>> unload the module we change the function pointer back to the stub. The
>> updates should be atomic operations so no lock needed, right?
>
> No
>
> Jason
>

Okay... Would you be willing to elaborate, please? A counter argument, or a simple
explanation would be appreciated. A simple "no" does not really do much to advance the
discussion :).

I'm fairly sure that a 64-bit pointer would be updated atomically. A reader of this value
is either going to see value A or value B, not the high half of A and the low half of B.
Maybe we also need a memory barrier to prevent stale values from being seen on another core?

--
-- Jason J. Herne ([email protected])

2021-05-25 14:19:13

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On Tue, May 25, 2021 at 10:07:46AM -0400, Jason J. Herne wrote:
> On 5/25/21 9:26 AM, Jason Gunthorpe wrote:
> > On Tue, May 25, 2021 at 09:24:59AM -0400, Jason J. Herne wrote:
> > > change the function pointer to point to vfio_ap_ops:handle_pqap(). When we
> > > unload the module we change the function pointer back to the stub. The
> > > updates should be atomic operations so no lock needed, right?
> >
> > No
> >
> > Jason
> >
>
> Okay... Would you be willing to elaborate, please? A counter argument, or a
> simple explanation would be appreciated. A simple "no" does not really do
> much to advance the discussion :).

Go back and review the earlier thread, the issue was never the
atomicity of the function pointer but the locking of the data that
function is accessing.

> I'm fairly sure that a 64-bit pointer would be updated atomically. A reader
> of this value is either going to see value A or value B, not the high half
> of A and the low half of B. Maybe we also need a memory barrier to prevent
> stale values from being seen on another core?

You need to use special macros in Linux to follow this memory model

Jason

2021-05-25 17:17:18

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On Tue, May 25, 2021 at 10:59:25AM -0400, Tony Krowiak wrote:
> > But this is alot of complicated and unused code to solve a lock
> > ordering problem..
>
> If you have a better solution, I'm all ears. I've been down this
> road a couple of times now and solving lock ordering for
> multiple asynchronous processes is not trivial. This seems like
> a reasonable solution and provides for flexibility for including
> additional hooks to handle interception of other AP instructions.

Lock ordering is very trivial. In this case you have to always hold
the hook lock before obtaining the matrix_dev lock. From what I
remember there was only one error on the set path where they were
ordered wrong

Jason

2021-05-25 17:26:13

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On Tue, May 25, 2021 at 11:08:22AM -0400, Tony Krowiak wrote:

> > Why can't you put the locks in the right order? It looked trivial, I'm confused.
>
> Because the handle_pqap() function in priv.c does not have access to the
> matrix_dev lock.

Based on the sketch made the handle_pqap() should only handle the
arch.crypto.rwsem.

When it calls the hook it gets the matrix dev

This sets the lock order as always: rwsem then matrix_dev

Of the other two places:

@@ -352,8 +352,7 @@ static int vfio_ap_mdev_create(struct mdev_device *mdev)
+ down_write(&&vcpu->kvm->arch.crypto.rwsem);
mutex_lock(&matrix_dev->lock);

Obviously correct

@@ -1202,7 +1203,9 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
mutex_lock(&matrix_dev->lock);
vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
+ down_write(&matrix_mdev->kvm->arch.crypto.rwsem);
matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
+ up_write(&matrix_mdev->kvm->arch.crypto.rwsem);

This is inverted

Just move the down_write up two lines

What is missing?

Jason

2021-05-25 18:01:29

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On Tue, May 25, 2021 at 11:56:50AM -0400, Tony Krowiak wrote:

> The vfio_ap_mdev_unset_kvm() function, however, is called both by
> the group notifier when the KVM pointer has been cleared or when the
> mdev is being removed. In both cases, the only way to get the KVM
> pointer - which is needed to unplug the AP resources from the guest
> - is from the matrix_mdev which contains it.

Okay, but that isn't a problem, the matrix dev holds a ref on the kvm
pointer so we can just copy it outside the lock after we prevent it
from changing by unregistering the notifier:

@@ -1362,14 +1365,19 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
{
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);

- mutex_lock(&matrix_dev->lock);
- vfio_ap_mdev_unset_kvm(matrix_mdev);
- mutex_unlock(&matrix_dev->lock);
-
vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
&matrix_mdev->iommu_notifier);
vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
&matrix_mdev->group_notifier);
+
+ mutex_lock(&matrix_dev->lock);
+ /* matrix_dev->kvm cannot be changed now since we removed the notifiers */
+ kvm = matrix_mdev->kvm;
+ matrix_mdev->kvm = NULL;
+ mutex_unlock(&matrix_dev->lock);
+
+ vfio_ap_mdev_unset_kvm(matrix_mdev, kvm);
+
module_put(THIS_MODULE);

Note the above misordering is an existing bug too

And reoganize unset_kvm so it uses internal locking and gets the kvm
from the argument.

Also the kvm_busy should be replaced by a proper rwsem, don't try to
open code locks like that - it just defeats lockdep analysis.

Finally, since the only way the ->kvm can be become non-NULL is if the
notifier is registered, release above removes the notifier, and remove
can't be called unless release has been completed, it looks to me like
this the remove check is just dead code, delete it, or leave it as a
WARN_ON:

@@ -366,16 +366,6 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);

mutex_lock(&matrix_dev->lock);
-
- /*
- * If the KVM pointer is in flux or the guest is running, disallow
- * un-assignment of control domain.
- */
- if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
- mutex_unlock(&matrix_dev->lock);
- return -EBUSY;
- }

Jason

2021-05-25 19:26:57

by Anthony Krowiak

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler



On 5/23/21 6:57 PM, Jason Gunthorpe wrote:
> On Fri, May 21, 2021 at 03:36:48PM -0400, Tony Krowiak wrote:
>> +static struct kvm_s390_crypto_hook
>> +*kvm_arch_crypto_find_hook(enum kvm_s390_crypto_hook_type type)
>> +{
>> + struct kvm_s390_crypto_hook *crypto_hook;
>> +
>> + list_for_each_entry(crypto_hook, &crypto_hooks, node) {
>> + if (crypto_hook->type == type)
>> + return crypto_hook;
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> +int kvm_arch_crypto_register_hook(struct kvm_s390_crypto_hook *hook)
>> +{
>> + struct kvm_s390_crypto_hook *crypto_hook;
>> +
>> + mutex_lock(&crypto_hooks_lock);
>> + crypto_hook = kvm_arch_crypto_find_hook(hook->type);
>> + if (crypto_hook) {
>> + if (crypto_hook->owner != hook->owner)
>> + return -EACCES;
>> + list_replace(&crypto_hook->node, &hook->node);
> This is all dead code right? This is only called from a module init
> function so it can't be called twice.

That is true only if you are considering the current case.
Is it guaranteed that only the vfio_ap module
will call this function and is there a guarantee that it will
always and only be called from the vfio_ap module init
function? For example, suppose a hook is added to
intercept the AP NQAP or DQAP instruction? We don't
know how or when those handlers will be registered
or unregistered. We don't even know if the handlers
will be part of the vfio_ap module or not.

> Just always fail if the hook is
> already used and delete the owner stuff.

I suppose that is reasonable and simpler.

>
> But this is alot of complicated and unused code to solve a lock
> ordering problem..

If you have a better solution, I'm all ears. I've been down this
road a couple of times now and solving lock ordering for
multiple asynchronous processes is not trivial. This seems like
a reasonable solution and provides for flexibility for including
additional hooks to handle interception of other AP instructions.

>
> Jason

2021-05-25 20:33:50

by Anthony Krowiak

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler



On 5/25/21 9:19 AM, Jason Gunthorpe wrote:
> On Tue, May 25, 2021 at 09:16:30AM -0400, Tony Krowiak wrote:
>>
>> On 5/24/21 10:37 AM, Jason J. Herne wrote:
>>> On 5/21/21 3:36 PM, Tony Krowiak wrote:
>>>> The function pointer to the handler that processes interception of the
>>>> PQAP instruction is contained in the mdev. If the mdev is removed and
>>>> its storage de-allocated during the processing of the PQAP instruction,
>>>> the function pointer could get wiped out before the function is called
>>>> because there is currently nothing that controls access to it.
>>>>
>>>> This patch introduces two new functions:
>>>> * The kvm_arch_crypto_register_hook() function registers a function
>>>> pointer
>>>>    for processing intercepted crypto instructions.
>>>> * The kvm_arch_crypto_register_hook() function un-registers a function
>>>>    pointer that was previously registered.
>>> Typo: You meant kvm_arch_crypto_UNregister_hook() in the second bullet.
>>>
>>>
>>> Just one overall observation on this one. The whole hook system seems
>>> kind of over-engineered if this is our only use for it. It looks like a
>>> kvm_s390_crypto_hook is meant to link a specific module with a function
>>> pointer. Do we really need this concept?
>>>
>>> I think a simpler design could be to just place a mutex and a function
>>> pointer in the kvm_s390_crypto struct. Then you can grab the mutex in
>>> vfio_ap_ops.c when registering/unregistering. You would also grab the
>>> mutex in priv.c when calling the function pointer. What I am suggesting
>>> is essentially the exact same scheme you have implemented here, but
>>> simpler and with less infrastructure.
>> That would be great, however; when I implemented something similar, it
>> resulted in a
>> lockdep splat between the lock used to protect the hook and the
>> matrix_dev->lock used to
>> protect updates to matrix_mdev (including the freeing thereof). After
>> pulling what little hair
>> I have left out, this seemed like a reasonable solution, over-engineered
>> though it may be.
>> If somebody has a simpler solution, I'm all ears.
> Why can't you put the locks in the right order? It looked trivial, I'm confused.

Because the handle_pqap() function in priv.c does not have access to the
matrix_dev lock.

>
> Jason

2021-05-25 20:53:28

by Anthony Krowiak

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler


> Why can't you put the locks in the right order? It looked trivial, I'm confused.
>
> Jason

I explained this in one of my responses to the previous series. Maybe I
didn't do
a good job of it, so let me see if I can provide a more thorough
explanation.

The handle_pqap() function in priv.c does not have any access to the
matrix_dev->lock which lives in the vfio_ap module, so there is no
way for this function to control the order of locking. The only lock
it can access is the lock provided for the hook function pointer which
must be held for the duration of the execution of the hook. When the
hook function (handle_pqap() in vfio_ap_ops.c) is called, it has to lock
the matrix_dev->lock mutex to do its thing. The interception of the
PQAP instruction that sets off the above scenario can happen simultaneously
with both the vfio_ap_mdev_set_kvm() and vfio_ap_mdev_unset_kvm()
instructions in vfio_ap_ops.c.

The vfio_ap_mdev_set_kvm() function is called only by the group notifier
callback when the vfio_ap driver is notified that the KVM pointer has
been set.
In this case, we could set the lock for the hook function before setting
the matrix_dev->lock and calling the vfio_ap_mdev_set_kvm() function and
all would be well.

The vfio_ap_mdev_unset_kvm() function, however, is called both by the group
notifier when the KVM pointer has been cleared or when the mdev is
being removed. In both cases, the only way to get the KVM pointer - which
is needed to unplug the AP resources from the guest - is from the
matrix_mdev
which contains it. This, of course, needs to be done while holding the
matrix_dev->lock mutex. The vfio_ap_mdev_unset_kvm() function also
clears the hook function pointer, but can only get the lock used to
control access
to it from the matrix_mdev; therein lies the rub. So we can have the
following
scenario which is flagged by lockdep:

CPU x:                                            CPU y:
--------                                             --------
                                                      lock the
matrix_dev->lock:
vfio_ap_mdev_set_kvm in vfio_ap_ops.c

lock the hook pointer:
handle_pqap in priv.c

lock the matrix_dev->lock
handle_pqap in vfio_ap_ops.c

                                                      lock the hook
pointer:
vfio_ap_mdev_set_kvm in vfio_ap_ops.c


Maybe I'm missing something, but I was unable to find a way around this when
the hook function pointer and its locking mechanism is stored in a field
of a satellite
structure of struct kvm.



2021-05-27 02:34:05

by Anthony Krowiak

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler



On 5/25/21 12:29 PM, Jason Gunthorpe wrote:
> On Tue, May 25, 2021 at 11:56:50AM -0400, Tony Krowiak wrote:
>
>> The vfio_ap_mdev_unset_kvm() function, however, is called both by
>> the group notifier when the KVM pointer has been cleared or when the
>> mdev is being removed. In both cases, the only way to get the KVM
>> pointer - which is needed to unplug the AP resources from the guest
>> - is from the matrix_mdev which contains it.
> Okay, but that isn't a problem, the matrix dev holds a ref on the kvm
> pointer so we can just copy it outside the lock after we prevent it
> from changing by unregistering the notifier:
>
> @@ -1362,14 +1365,19 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
> {
> struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>
> - mutex_lock(&matrix_dev->lock);
> - vfio_ap_mdev_unset_kvm(matrix_mdev);
> - mutex_unlock(&matrix_dev->lock);
> -
> vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> &matrix_mdev->iommu_notifier);
> vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
> &matrix_mdev->group_notifier);
> +
> + mutex_lock(&matrix_dev->lock);
> + /* matrix_dev->kvm cannot be changed now since we removed the notifiers */
> + kvm = matrix_mdev->kvm;
> + matrix_mdev->kvm = NULL;
> + mutex_unlock(&matrix_dev->lock);
> +
> + vfio_ap_mdev_unset_kvm(matrix_mdev, kvm);
> +
> module_put(THIS_MODULE);
>
> Note the above misordering is an existing bug too
>
> And reoganize unset_kvm so it uses internal locking and gets the kvm
> from the argument.

As I told you in a previous email, this is not a trivial exercise.
If you take a look at the vfio_ap_mdev_unset_kvm()
function, you will notice that it invokes the vfio_ap_mdev_reset_queues()
function to reset the queues after taking them away from the
guest. As each queue is reset, the resources required for
processing interrupts for the queue are freed in the
vfio_ap_free_aqic_resources() function. In order to unregister the
the guest's ISC, the matrix_mdev->kvm pointer must still
be set, however, you cleared it above.

Another thing you're overlooking is the fact that all of the
assignment/unassignment functions associated with the
corresponding syfs attributes of the mdev change the
content of the matrix_mdev->matrix and
matrix_mdev->shadow_apcb structures. In particular,
the matrix_mdev->matrix contains the APQNs of the
queues that must be reset. These sysfs attributes can
be accessed at any time including when the
vfio_ap_mdev_unset_kvm() function is executing,
so that is something that must also be taken into
consideration.


>
> Also the kvm_busy should be replaced by a proper rwsem, don't try to
> open code locks like that - it just defeats lockdep analysis.

I've had no luck trying to refactor this using rwsem. I always
run into lockdep problems between the matrix_dev->lock
and matrix_mdev->rwsem, even if the locking order is maintained.
Clearly, I am lacking in understanding of how these locks
interact. Any clues here?

> Finally, since the only way the ->kvm can be become non-NULL is if the
> notifier is registered, release above removes the notifier, and remove
> can't be called unless release has been completed, it looks to me like
> this the remove check is just dead code, delete it, or leave it as a
> WARN_ON:
>
> @@ -366,16 +366,6 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
> struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>
> mutex_lock(&matrix_dev->lock);
> -
> - /*
> - * If the KVM pointer is in flux or the guest is running, disallow
> - * un-assignment of control domain.
> - */
> - if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
> - mutex_unlock(&matrix_dev->lock);
> - return -EBUSY;
> - }
>
> Jason

2021-05-27 15:47:54

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] s390/vfio-ap: control access to PQAP(AQIC) interception handler

On Wed, May 26, 2021 at 10:28:29PM -0400, Tony Krowiak wrote:
>
>
> On 5/25/21 12:29 PM, Jason Gunthorpe wrote:
> > On Tue, May 25, 2021 at 11:56:50AM -0400, Tony Krowiak wrote:
> >
> > > The vfio_ap_mdev_unset_kvm() function, however, is called both by
> > > the group notifier when the KVM pointer has been cleared or when the
> > > mdev is being removed. In both cases, the only way to get the KVM
> > > pointer - which is needed to unplug the AP resources from the guest
> > > - is from the matrix_mdev which contains it.
> > Okay, but that isn't a problem, the matrix dev holds a ref on the kvm
> > pointer so we can just copy it outside the lock after we prevent it
> > from changing by unregistering the notifier:
> >
> > @@ -1362,14 +1365,19 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
> > {
> > struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
> > - mutex_lock(&matrix_dev->lock);
> > - vfio_ap_mdev_unset_kvm(matrix_mdev);
> > - mutex_unlock(&matrix_dev->lock);
> > -
> > vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
> > &matrix_mdev->iommu_notifier);
> > vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
> > &matrix_mdev->group_notifier);
> > +
> > + mutex_lock(&matrix_dev->lock);
> > + /* matrix_dev->kvm cannot be changed now since we removed the notifiers */
> > + kvm = matrix_mdev->kvm;
> > + matrix_mdev->kvm = NULL;
> > + mutex_unlock(&matrix_dev->lock);
> > +
> > + vfio_ap_mdev_unset_kvm(matrix_mdev, kvm);
> > +
> > module_put(THIS_MODULE);
> >
> > Note the above misordering is an existing bug too
> >
> > And reoganize unset_kvm so it uses internal locking and gets the kvm
> > from the argument.
>
> As I told you in a previous email, this is not a trivial exercise.

Well, it is not a 5 line patch, but it looks like 10 mins work and
some testing to me, tracking down all the uses of matrx_mdev->kvm
under the vfio_ap_mdev_unset_kvm() call does not seem difficult nor do
there seem to be so many.

> vfio_ap_free_aqic_resources() function. In order to unregister the
> the guest's ISC, the matrix_mdev->kvm pointer must still
> be set, however, you cleared it above.

Which is why I said unset_kvm needs to be reorganized to use the kvm
argument, not the matrixt_mdev->kvm

> Another thing you're overlooking is the fact that all of the
> assignment/unassignment functions associated with the
> corresponding syfs attributes of the mdev change the
> content of the matrix_mdev->matrix and
> matrix_mdev->shadow_apcb structures. In particular,
> the matrix_mdev->matrix contains the APQNs of the
> queues that must be reset. These sysfs attributes can
> be accessed at any time including when the
> vfio_ap_mdev_unset_kvm() function is executing,
> so that is something that must also be taken into
> consideration.

I checked and thought they already had a lock?

> > Also the kvm_busy should be replaced by a proper rwsem, don't try to
> > open code locks like that - it just defeats lockdep analysis.
>
> I've had no luck trying to refactor this using rwsem. I always
> run into lockdep problems between the matrix_dev->lock
> and matrix_mdev->rwsem, even if the locking order is maintained.

Usually when people start open coding locks it is often because
lockdep complained.

Open coding a lock makes lockdep stop because the lockdep code is
removed, but it doesn't fix anything.

> Clearly, I am lacking in understanding of how these locks
> interact. Any clues here?

I'd have to see the lockdep reports and look at it quite a lot
more.

Jason