In vmci_host.c, missing memory barrier between vmci_host_dev->ct_type
and vmci_host_dev->context may cause uninitialized data access.
One of possible execution flows is as follows:
CPU 1 (vmci_host_do_init_context)
=====
vmci_host_dev->context = vmci_ctx_create(...) // 1
vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; // 2
CPU 2 (vmci_host_poll)
=====
if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { // 3
context = vmci_host_dev->context; // 4
poll_wait(..., &context->host_context.wait_queue, ...);
While ct_type serves as a flag indicating that context is initialized,
there is no memory barrier which prevents reordering between
1,2 and 3, 4. So it is possible that 4 reads uninitialized
vmci_host_dev->context.
In this case, the null dereference occurs in poll_wait().
In order to prevent this kind of reordering, we change plain accesses
to ct_type into smp_load_acquire() and smp_store_release().
Signed-off-by: Yewon Choi <[email protected]>
---
drivers/misc/vmw_vmci/vmci_host.c | 40 ++++++++++++++++++-------------
1 file changed, 23 insertions(+), 17 deletions(-)
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index abe79f6fd2a7..e83b6e0fe55b 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -139,7 +139,7 @@ static int vmci_host_close(struct inode *inode, struct file *filp)
{
struct vmci_host_dev *vmci_host_dev = filp->private_data;
- if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) == VMCIOBJ_CONTEXT) {
vmci_ctx_destroy(vmci_host_dev->context);
vmci_host_dev->context = NULL;
@@ -168,7 +168,7 @@ static __poll_t vmci_host_poll(struct file *filp, poll_table *wait)
struct vmci_ctx *context;
__poll_t mask = 0;
- if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) == VMCIOBJ_CONTEXT) {
/*
* Read context only if ct_type == VMCIOBJ_CONTEXT to make
* sure that context is initialized
@@ -309,7 +309,7 @@ static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev,
mutex_lock(&vmci_host_dev->lock);
- if (vmci_host_dev->ct_type != VMCIOBJ_NOT_SET) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_NOT_SET) {
vmci_ioctl_err("received VMCI init on initialized handle\n");
retval = -EINVAL;
goto out;
@@ -346,7 +346,13 @@ static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev,
goto out;
}
- vmci_host_dev->ct_type = VMCIOBJ_CONTEXT;
+ /*
+ * Make sure that ct_type is written after
+ * vmci_host_dev->context is initialized.
+ *
+ * This pairs with smp_load_acquire() in vmci_host_XXX.
+ */
+ smp_store_release(&vmci_host_dev->ct_type, VMCIOBJ_CONTEXT);
atomic_inc(&vmci_host_active_users);
vmci_call_vsock_callback(true);
@@ -366,7 +372,7 @@ static int vmci_host_do_send_datagram(struct vmci_host_dev *vmci_host_dev,
struct vmci_datagram *dg = NULL;
u32 cid;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -422,7 +428,7 @@ static int vmci_host_do_receive_datagram(struct vmci_host_dev *vmci_host_dev,
int retval;
size_t size;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -453,7 +459,7 @@ static int vmci_host_do_alloc_queuepair(struct vmci_host_dev *vmci_host_dev,
int vmci_status;
int __user *retptr;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -522,7 +528,7 @@ static int vmci_host_do_queuepair_setva(struct vmci_host_dev *vmci_host_dev,
struct vmci_qp_set_va_info __user *info = uptr;
s32 result;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -570,7 +576,7 @@ static int vmci_host_do_queuepair_setpf(struct vmci_host_dev *vmci_host_dev,
return -EINVAL;
}
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -641,7 +647,7 @@ static int vmci_host_do_qp_detach(struct vmci_host_dev *vmci_host_dev,
struct vmci_qp_dtch_info __user *info = uptr;
s32 result;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -668,7 +674,7 @@ static int vmci_host_do_ctx_add_notify(struct vmci_host_dev *vmci_host_dev,
s32 result;
u32 cid;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -691,7 +697,7 @@ static int vmci_host_do_ctx_remove_notify(struct vmci_host_dev *vmci_host_dev,
u32 cid;
int result;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -715,7 +721,7 @@ static int vmci_host_do_ctx_get_cpt_state(struct vmci_host_dev *vmci_host_dev,
void *cpt_buf;
int retval;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -747,7 +753,7 @@ static int vmci_host_do_ctx_set_cpt_state(struct vmci_host_dev *vmci_host_dev,
void *cpt_buf;
int retval;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -785,7 +791,7 @@ static int vmci_host_do_set_notify(struct vmci_host_dev *vmci_host_dev,
{
struct vmci_set_notify_info notify_info;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -818,7 +824,7 @@ static int vmci_host_do_notify_resource(struct vmci_host_dev *vmci_host_dev,
return -EINVAL;
}
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
@@ -867,7 +873,7 @@ static int vmci_host_do_recv_notifications(struct vmci_host_dev *vmci_host_dev,
u32 cid;
int retval = 0;
- if (vmci_host_dev->ct_type != VMCIOBJ_CONTEXT) {
+ if (smp_load_acquire(&vmci_host_dev->ct_type) != VMCIOBJ_CONTEXT) {
vmci_ioctl_err("only valid for contexts\n");
return -EINVAL;
}
--
2.37.3
On Wed, Nov 22, 2023 at 09:20:08PM +0900, Yewon Choi wrote:
> In vmci_host.c, missing memory barrier between vmci_host_dev->ct_type
> and vmci_host_dev->context may cause uninitialized data access.
>
> One of possible execution flows is as follows:
>
> CPU 1 (vmci_host_do_init_context)
> =====
> vmci_host_dev->context = vmci_ctx_create(...) // 1
> vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; // 2
>
> CPU 2 (vmci_host_poll)
> =====
> if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { // 3
> context = vmci_host_dev->context; // 4
> poll_wait(..., &context->host_context.wait_queue, ...);
>
> While ct_type serves as a flag indicating that context is initialized,
> there is no memory barrier which prevents reordering between
> 1,2 and 3, 4. So it is possible that 4 reads uninitialized
> vmci_host_dev->context.
> In this case, the null dereference occurs in poll_wait().
>
> In order to prevent this kind of reordering, we change plain accesses
> to ct_type into smp_load_acquire() and smp_store_release().
>
> Signed-off-by: Yewon Choi <[email protected]>
> ---
> drivers/misc/vmw_vmci/vmci_host.c | 40 ++++++++++++++++++-------------
> 1 file changed, 23 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
> index abe79f6fd2a7..e83b6e0fe55b 100644
> --- a/drivers/misc/vmw_vmci/vmci_host.c
> +++ b/drivers/misc/vmw_vmci/vmci_host.c
> @@ -139,7 +139,7 @@ static int vmci_host_close(struct inode *inode, struct file *filp)
> {
> struct vmci_host_dev *vmci_host_dev = filp->private_data;
>
> - if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
> + if (smp_load_acquire(&vmci_host_dev->ct_type) == VMCIOBJ_CONTEXT) {
This is getting tricky, why not use a normal lock to ensure that all is
safe? close isn't on a "fast path", so this shouldn't be a speed issue,
right?
thanks,
greg k-h
On Wed, Nov 22, 2023 at 02:34:55PM +0000, Greg Kroah-Hartman wrote:
> On Wed, Nov 22, 2023 at 09:20:08PM +0900, Yewon Choi wrote:
> > In vmci_host.c, missing memory barrier between vmci_host_dev->ct_type
> > and vmci_host_dev->context may cause uninitialized data access.
> >
> > One of possible execution flows is as follows:
> >
> > CPU 1 (vmci_host_do_init_context)
> > =====
> > vmci_host_dev->context = vmci_ctx_create(...) // 1
> > vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; // 2
> >
> > CPU 2 (vmci_host_poll)
> > =====
> > if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { // 3
> > context = vmci_host_dev->context; // 4
> > poll_wait(..., &context->host_context.wait_queue, ...);
> >
> > While ct_type serves as a flag indicating that context is initialized,
> > there is no memory barrier which prevents reordering between
> > 1,2 and 3, 4. So it is possible that 4 reads uninitialized
> > vmci_host_dev->context.
> > In this case, the null dereference occurs in poll_wait().
> >
> > In order to prevent this kind of reordering, we change plain accesses
> > to ct_type into smp_load_acquire() and smp_store_release().
> >
> > Signed-off-by: Yewon Choi <[email protected]>
> > ---
> > drivers/misc/vmw_vmci/vmci_host.c | 40 ++++++++++++++++++-------------
> > 1 file changed, 23 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
> > index abe79f6fd2a7..e83b6e0fe55b 100644
> > --- a/drivers/misc/vmw_vmci/vmci_host.c
> > +++ b/drivers/misc/vmw_vmci/vmci_host.c
> > @@ -139,7 +139,7 @@ static int vmci_host_close(struct inode *inode, struct file *filp)
> > {
> > struct vmci_host_dev *vmci_host_dev = filp->private_data;
> >
> > - if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
> > + if (smp_load_acquire(&vmci_host_dev->ct_type) == VMCIOBJ_CONTEXT) {
>
> This is getting tricky, why not use a normal lock to ensure that all is
> safe? close isn't on a "fast path", so this shouldn't be a speed issue,
> right?
>
I think using locks can be considered orthogonal to correcting memory ordering.
As you pointed out, vmci_host_close is not a performance-critical function
while other functions using vmci_host_dev->context are performance-critical.
If the lock is needed, we will need to add locks in all of them. I cannot be
sure which is better. Besides that, it seems to be a separate issue.
On the other hand, the current implementation doesn't guarantee memory ordering
which leads to wrong behavior.
This patch fixes this issue by adding primitives.
Thank you for your reply.
Regards,
Yewon Choi
> thanks,
>
> greg k-h
On Thu, Nov 23, 2023 at 04:49:22PM +0900, Yewon Choi wrote:
> On Wed, Nov 22, 2023 at 02:34:55PM +0000, Greg Kroah-Hartman wrote:
> > On Wed, Nov 22, 2023 at 09:20:08PM +0900, Yewon Choi wrote:
> > > In vmci_host.c, missing memory barrier between vmci_host_dev->ct_type
> > > and vmci_host_dev->context may cause uninitialized data access.
> > >
> > > One of possible execution flows is as follows:
> > >
> > > CPU 1 (vmci_host_do_init_context)
> > > =====
> > > vmci_host_dev->context = vmci_ctx_create(...) // 1
> > > vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; // 2
> > >
> > > CPU 2 (vmci_host_poll)
> > > =====
> > > if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { // 3
> > > context = vmci_host_dev->context; // 4
> > > poll_wait(..., &context->host_context.wait_queue, ...);
> > >
> > > While ct_type serves as a flag indicating that context is initialized,
> > > there is no memory barrier which prevents reordering between
> > > 1,2 and 3, 4. So it is possible that 4 reads uninitialized
> > > vmci_host_dev->context.
> > > In this case, the null dereference occurs in poll_wait().
> > >
> > > In order to prevent this kind of reordering, we change plain accesses
> > > to ct_type into smp_load_acquire() and smp_store_release().
> > >
> > > Signed-off-by: Yewon Choi <[email protected]>
> > > ---
> > > drivers/misc/vmw_vmci/vmci_host.c | 40 ++++++++++++++++++-------------
> > > 1 file changed, 23 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
> > > index abe79f6fd2a7..e83b6e0fe55b 100644
> > > --- a/drivers/misc/vmw_vmci/vmci_host.c
> > > +++ b/drivers/misc/vmw_vmci/vmci_host.c
> > > @@ -139,7 +139,7 @@ static int vmci_host_close(struct inode *inode, struct file *filp)
> > > {
> > > struct vmci_host_dev *vmci_host_dev = filp->private_data;
> > >
> > > - if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
> > > + if (smp_load_acquire(&vmci_host_dev->ct_type) == VMCIOBJ_CONTEXT) {
> >
> > This is getting tricky, why not use a normal lock to ensure that all is
> > safe? close isn't on a "fast path", so this shouldn't be a speed issue,
> > right?
> >
>
> I think using locks can be considered orthogonal to correcting memory ordering.
But they ensure proper memory ordering.
> As you pointed out, vmci_host_close is not a performance-critical function
> while other functions using vmci_host_dev->context are performance-critical.
In what way? Why is the context being constantly checked in such
situations? And if so, it can change right after being checked so a
real lock needs to be used.
> If the lock is needed, we will need to add locks in all of them. I cannot be
> sure which is better. Besides that, it seems to be a separate issue.
Nope, I think it's the same issue :)
> On the other hand, the current implementation doesn't guarantee memory ordering
> which leads to wrong behavior.
> This patch fixes this issue by adding primitives.
But it's still wrong, again, what keeps the value from changing right
after checking it?
thanks,
greg k-h
On Thu, Nov 23, 2023 at 08:44:46AM +0000, Greg Kroah-Hartman wrote:
> On Thu, Nov 23, 2023 at 04:49:22PM +0900, Yewon Choi wrote:
> > On Wed, Nov 22, 2023 at 02:34:55PM +0000, Greg Kroah-Hartman wrote:
> > > On Wed, Nov 22, 2023 at 09:20:08PM +0900, Yewon Choi wrote:
> > > > In vmci_host.c, missing memory barrier between vmci_host_dev->ct_type
> > > > and vmci_host_dev->context may cause uninitialized data access.
> > > >
> > > > One of possible execution flows is as follows:
> > > >
> > > > CPU 1 (vmci_host_do_init_context)
> > > > =====
> > > > vmci_host_dev->context = vmci_ctx_create(...) // 1
> > > > vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; // 2
> > > >
> > > > CPU 2 (vmci_host_poll)
> > > > =====
> > > > if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { // 3
> > > > context = vmci_host_dev->context; // 4
> > > > poll_wait(..., &context->host_context.wait_queue, ...);
> > > >
> > > > While ct_type serves as a flag indicating that context is initialized,
> > > > there is no memory barrier which prevents reordering between
> > > > 1,2 and 3, 4. So it is possible that 4 reads uninitialized
> > > > vmci_host_dev->context.
> > > > In this case, the null dereference occurs in poll_wait().
> > > >
> > > > In order to prevent this kind of reordering, we change plain accesses
> > > > to ct_type into smp_load_acquire() and smp_store_release().
> > > >
> > > > Signed-off-by: Yewon Choi <[email protected]>
> > > > ---
> > > > drivers/misc/vmw_vmci/vmci_host.c | 40 ++++++++++++++++++-------------
> > > > 1 file changed, 23 insertions(+), 17 deletions(-)
> > > >
> > > > diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
> > > > index abe79f6fd2a7..e83b6e0fe55b 100644
> > > > --- a/drivers/misc/vmw_vmci/vmci_host.c
> > > > +++ b/drivers/misc/vmw_vmci/vmci_host.c
> > > > @@ -139,7 +139,7 @@ static int vmci_host_close(struct inode *inode, struct file *filp)
> > > > {
> > > > struct vmci_host_dev *vmci_host_dev = filp->private_data;
> > > >
> > > > - if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
> > > > + if (smp_load_acquire(&vmci_host_dev->ct_type) == VMCIOBJ_CONTEXT) {
> > >
> > > This is getting tricky, why not use a normal lock to ensure that all is
> > > safe? close isn't on a "fast path", so this shouldn't be a speed issue,
> > > right?
> > >
> >
> > I think using locks can be considered orthogonal to correcting memory ordering.
>
> But they ensure proper memory ordering.
Yes, using a lock obviously ensures memory ordering.
> > If the lock is needed, we will need to add locks in all of them. I cannot be
> > sure which is better. Besides that, it seems to be a separate issue.
>
> Nope, I think it's the same issue :)
>
> > On the other hand, the current implementation doesn't guarantee memory ordering
> > which leads to wrong behavior.
> > This patch fixes this issue by adding primitives.
>
> But it's still wrong, again, what keeps the value from changing right
> after checking it?
>
> thanks,
>
> greg k-h
It seems that VMCI assumes that vmci_host_dev->context is not NULL if
vmci_host_dev->ct_type == VMCIOBJ_CONTEXT (because all readers of
vmci_host_dev->context check whether vmci_host_dev->ct_type is
VMCIOBJ_CONTEXT or not, and access vmci_host_dev->context without
checking whether is it NULL or not). So I think this patch clarifies
this assumption.
As you said, we need to ensure that vmci_host_dev->context is not
changed after checking vmci_host_dev->ct_type. But
(1) the only place that changes vmci_host_dev->context is
vmci_host_close() and
(2) (I think) vmci_host_close() do not concurrently run with readers
of vmci_host_dev->context. IIUC, all readers of vmci_host_dev->context
are system calls (eg, ioctl handlers or the poll handler). So I think
the ref count of the file saves us here. (Otherwise, Syzkaller will
tell us the truth maybe?)
At least, this patch introduces no change of the logic but the
guarantees of the memory ordering, so I think this patch is safe?
Best regards,
Dae R. Jeong
On Thu, Nov 23, 2023 at 07:06:52PM +0900, Dae R. Jeong wrote:
> On Thu, Nov 23, 2023 at 08:44:46AM +0000, Greg Kroah-Hartman wrote:
> > On Thu, Nov 23, 2023 at 04:49:22PM +0900, Yewon Choi wrote:
> > > On Wed, Nov 22, 2023 at 02:34:55PM +0000, Greg Kroah-Hartman wrote:
> > > > On Wed, Nov 22, 2023 at 09:20:08PM +0900, Yewon Choi wrote:
> > > > > In vmci_host.c, missing memory barrier between vmci_host_dev->ct_type
> > > > > and vmci_host_dev->context may cause uninitialized data access.
> > > > >
> > > > > One of possible execution flows is as follows:
> > > > >
> > > > > CPU 1 (vmci_host_do_init_context)
> > > > > =====
> > > > > vmci_host_dev->context = vmci_ctx_create(...) // 1
> > > > > vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; // 2
> > > > >
> > > > > CPU 2 (vmci_host_poll)
> > > > > =====
> > > > > if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { // 3
> > > > > context = vmci_host_dev->context; // 4
> > > > > poll_wait(..., &context->host_context.wait_queue, ...);
> > > > >
> > > > > While ct_type serves as a flag indicating that context is initialized,
> > > > > there is no memory barrier which prevents reordering between
> > > > > 1,2 and 3, 4. So it is possible that 4 reads uninitialized
> > > > > vmci_host_dev->context.
> > > > > In this case, the null dereference occurs in poll_wait().
> > > > >
> > > > > In order to prevent this kind of reordering, we change plain accesses
> > > > > to ct_type into smp_load_acquire() and smp_store_release().
> > > > >
> > > > > Signed-off-by: Yewon Choi <[email protected]>
> > > > > ---
> > > > > drivers/misc/vmw_vmci/vmci_host.c | 40 ++++++++++++++++++-------------
> > > > > 1 file changed, 23 insertions(+), 17 deletions(-)
> > > > >
> > > > > diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
> > > > > index abe79f6fd2a7..e83b6e0fe55b 100644
> > > > > --- a/drivers/misc/vmw_vmci/vmci_host.c
> > > > > +++ b/drivers/misc/vmw_vmci/vmci_host.c
> > > > > @@ -139,7 +139,7 @@ static int vmci_host_close(struct inode *inode, struct file *filp)
> > > > > {
> > > > > struct vmci_host_dev *vmci_host_dev = filp->private_data;
> > > > >
> > > > > - if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
> > > > > + if (smp_load_acquire(&vmci_host_dev->ct_type) == VMCIOBJ_CONTEXT) {
> > > >
> > > > This is getting tricky, why not use a normal lock to ensure that all is
> > > > safe? close isn't on a "fast path", so this shouldn't be a speed issue,
> > > > right?
> > > >
> > >
> > > I think using locks can be considered orthogonal to correcting memory ordering.
> >
> > But they ensure proper memory ordering.
>
> Yes, using a lock obviously ensures memory ordering.
>
> > > If the lock is needed, we will need to add locks in all of them. I cannot be
> > > sure which is better. Besides that, it seems to be a separate issue.
> >
> > Nope, I think it's the same issue :)
> >
> > > On the other hand, the current implementation doesn't guarantee memory ordering
> > > which leads to wrong behavior.
> > > This patch fixes this issue by adding primitives.
> >
> > But it's still wrong, again, what keeps the value from changing right
> > after checking it?
> >
> > thanks,
> >
> > greg k-h
>
> It seems that VMCI assumes that vmci_host_dev->context is not NULL if
> vmci_host_dev->ct_type == VMCIOBJ_CONTEXT (because all readers of
> vmci_host_dev->context check whether vmci_host_dev->ct_type is
> VMCIOBJ_CONTEXT or not, and access vmci_host_dev->context without
> checking whether is it NULL or not). So I think this patch clarifies
> this assumption.
>
> As you said, we need to ensure that vmci_host_dev->context is not
> changed after checking vmci_host_dev->ct_type. But
> (1) the only place that changes vmci_host_dev->context is
> vmci_host_close() and
Then why is it even checked in close()?
> (2) (I think) vmci_host_close() do not concurrently run with readers
> of vmci_host_dev->context. IIUC, all readers of vmci_host_dev->context
> are system calls (eg, ioctl handlers or the poll handler). So I think
> the ref count of the file saves us here. (Otherwise, Syzkaller will
> tell us the truth maybe?)
Ok, then why is this needed to be checked then at all?
> At least, this patch introduces no change of the logic but the
> guarantees of the memory ordering, so I think this patch is safe?
I think the logic is incorrect, don't try to paper over it thinking that
the issue to be solved is "memory ordering" please. Solve the root
issue here.
thanks,
greg k-h
On Thu, Nov 23, 2023 at 10:14:52AM +0000, Greg Kroah-Hartman wrote:
> On Thu, Nov 23, 2023 at 07:06:52PM +0900, Dae R. Jeong wrote:
> > On Thu, Nov 23, 2023 at 08:44:46AM +0000, Greg Kroah-Hartman wrote:
> > > On Thu, Nov 23, 2023 at 04:49:22PM +0900, Yewon Choi wrote:
> > > > On Wed, Nov 22, 2023 at 02:34:55PM +0000, Greg Kroah-Hartman wrote:
> > > > > On Wed, Nov 22, 2023 at 09:20:08PM +0900, Yewon Choi wrote:
> > > > > > In vmci_host.c, missing memory barrier between vmci_host_dev->ct_type
> > > > > > and vmci_host_dev->context may cause uninitialized data access.
> > > > > >
> > > > > > One of possible execution flows is as follows:
> > > > > >
> > > > > > CPU 1 (vmci_host_do_init_context)
> > > > > > =====
> > > > > > vmci_host_dev->context = vmci_ctx_create(...) // 1
> > > > > > vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; // 2
> > > > > >
> > > > > > CPU 2 (vmci_host_poll)
> > > > > > =====
> > > > > > if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { // 3
> > > > > > context = vmci_host_dev->context; // 4
> > > > > > poll_wait(..., &context->host_context.wait_queue, ...);
> > > > > >
> > > > > > While ct_type serves as a flag indicating that context is initialized,
> > > > > > there is no memory barrier which prevents reordering between
> > > > > > 1,2 and 3, 4. So it is possible that 4 reads uninitialized
> > > > > > vmci_host_dev->context.
> > > > > > In this case, the null dereference occurs in poll_wait().
> > > > > >
> > > > > > In order to prevent this kind of reordering, we change plain accesses
> > > > > > to ct_type into smp_load_acquire() and smp_store_release().
> > > > > >
> > > > > > Signed-off-by: Yewon Choi <[email protected]>
> > > > > > ---
> > > > > > drivers/misc/vmw_vmci/vmci_host.c | 40 ++++++++++++++++++-------------
> > > > > > 1 file changed, 23 insertions(+), 17 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
> > > > > > index abe79f6fd2a7..e83b6e0fe55b 100644
> > > > > > --- a/drivers/misc/vmw_vmci/vmci_host.c
> > > > > > +++ b/drivers/misc/vmw_vmci/vmci_host.c
> > > > > > @@ -139,7 +139,7 @@ static int vmci_host_close(struct inode *inode, struct file *filp)
> > > > > > {
> > > > > > struct vmci_host_dev *vmci_host_dev = filp->private_data;
> > > > > >
> > > > > > - if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
> > > > > > + if (smp_load_acquire(&vmci_host_dev->ct_type) == VMCIOBJ_CONTEXT) {
> > > > >
> > > > > This is getting tricky, why not use a normal lock to ensure that all is
> > > > > safe? close isn't on a "fast path", so this shouldn't be a speed issue,
> > > > > right?
> > > > >
> > > >
> > > > I think using locks can be considered orthogonal to correcting memory ordering.
> > >
> > > But they ensure proper memory ordering.
> >
> > Yes, using a lock obviously ensures memory ordering.
> >
> > > > If the lock is needed, we will need to add locks in all of them. I cannot be
> > > > sure which is better. Besides that, it seems to be a separate issue.
> > >
> > > Nope, I think it's the same issue :)
> > >
> > > > On the other hand, the current implementation doesn't guarantee memory ordering
> > > > which leads to wrong behavior.
> > > > This patch fixes this issue by adding primitives.
> > >
> > > But it's still wrong, again, what keeps the value from changing right
> > > after checking it?
> > >
> > > thanks,
> > >
> > > greg k-h
> >
> > It seems that VMCI assumes that vmci_host_dev->context is not NULL if
> > vmci_host_dev->ct_type == VMCIOBJ_CONTEXT (because all readers of
> > vmci_host_dev->context check whether vmci_host_dev->ct_type is
> > VMCIOBJ_CONTEXT or not, and access vmci_host_dev->context without
> > checking whether is it NULL or not). So I think this patch clarifies
> > this assumption.
> >
> > As you said, we need to ensure that vmci_host_dev->context is not
> > changed after checking vmci_host_dev->ct_type. But
> > (1) the only place that changes vmci_host_dev->context is
> > vmci_host_close() and
>
> Then why is it even checked in close()?
It is because close() needs to destory vmci_host_dev->context if it is
created.
> > (2) (I think) vmci_host_close() do not concurrently run with readers
> > of vmci_host_dev->context. IIUC, all readers of vmci_host_dev->context
> > are system calls (eg, ioctl handlers or the poll handler). So I think
> > the ref count of the file saves us here. (Otherwise, Syzkaller will
> > tell us the truth maybe?)
>
> Ok, then why is this needed to be checked then at all?
It is because vmci_host_dev->context is created by
ioctl(IOCTL_VMCI_INIT_CONTEXT). So it is possible that vmci_host_dev
is created but vmci_host_dev->context is *not* created. All other
places should be careful of this.
> > At least, this patch introduces no change of the logic but the
> > guarantees of the memory ordering, so I think this patch is safe?
>
> I think the logic is incorrect, don't try to paper over it thinking that
> the issue to be solved is "memory ordering" please. Solve the root
> issue here.
I don't exactly get the point what you think the root issue is.
We can have a system call sequence like this:
fd = open("/dev/vmci")
ioctl(fd, VMCI_VERSION2, user_version)
ioctl(fd, INIT_CONTEXT) // this somewhat depends on ioctl(VMCI_VERSION2) as it runs `context->user_version = user_version`
Between open() and ioctl(INIT_CONTEXT), we have vmci_host_dev
initialized but vmci_host_dev->context is not initialized. We need to
check whether vmci_host_dev->context is initialized in other
places. And I still think store_release/load_acquire can be used to
declare that context is created and check whether context is created
or not. Please excuse me if I'm wrong...
Best regards,
Dae R. Jeong
On Thu, Nov 23, 2023 at 08:08:29PM +0900, Dae R. Jeong wrote:
> On Thu, Nov 23, 2023 at 10:14:52AM +0000, Greg Kroah-Hartman wrote:
> > On Thu, Nov 23, 2023 at 07:06:52PM +0900, Dae R. Jeong wrote:
> > > On Thu, Nov 23, 2023 at 08:44:46AM +0000, Greg Kroah-Hartman wrote:
> > > > On Thu, Nov 23, 2023 at 04:49:22PM +0900, Yewon Choi wrote:
> > > > > On Wed, Nov 22, 2023 at 02:34:55PM +0000, Greg Kroah-Hartman wrote:
> > > > > > On Wed, Nov 22, 2023 at 09:20:08PM +0900, Yewon Choi wrote:
> > > > > > > In vmci_host.c, missing memory barrier between vmci_host_dev->ct_type
> > > > > > > and vmci_host_dev->context may cause uninitialized data access.
> > > > > > >
> > > > > > > One of possible execution flows is as follows:
> > > > > > >
> > > > > > > CPU 1 (vmci_host_do_init_context)
> > > > > > > =====
> > > > > > > vmci_host_dev->context = vmci_ctx_create(...) // 1
> > > > > > > vmci_host_dev->ct_type = VMCIOBJ_CONTEXT; // 2
> > > > > > >
> > > > > > > CPU 2 (vmci_host_poll)
> > > > > > > =====
> > > > > > > if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) { // 3
> > > > > > > context = vmci_host_dev->context; // 4
> > > > > > > poll_wait(..., &context->host_context.wait_queue, ...);
> > > > > > >
> > > > > > > While ct_type serves as a flag indicating that context is initialized,
> > > > > > > there is no memory barrier which prevents reordering between
> > > > > > > 1,2 and 3, 4. So it is possible that 4 reads uninitialized
> > > > > > > vmci_host_dev->context.
> > > > > > > In this case, the null dereference occurs in poll_wait().
> > > > > > >
> > > > > > > In order to prevent this kind of reordering, we change plain accesses
> > > > > > > to ct_type into smp_load_acquire() and smp_store_release().
> > > > > > >
> > > > > > > Signed-off-by: Yewon Choi <[email protected]>
> > > > > > > ---
> > > > > > > drivers/misc/vmw_vmci/vmci_host.c | 40 ++++++++++++++++++-------------
> > > > > > > 1 file changed, 23 insertions(+), 17 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
> > > > > > > index abe79f6fd2a7..e83b6e0fe55b 100644
> > > > > > > --- a/drivers/misc/vmw_vmci/vmci_host.c
> > > > > > > +++ b/drivers/misc/vmw_vmci/vmci_host.c
> > > > > > > @@ -139,7 +139,7 @@ static int vmci_host_close(struct inode *inode, struct file *filp)
> > > > > > > {
> > > > > > > struct vmci_host_dev *vmci_host_dev = filp->private_data;
> > > > > > >
> > > > > > > - if (vmci_host_dev->ct_type == VMCIOBJ_CONTEXT) {
> > > > > > > + if (smp_load_acquire(&vmci_host_dev->ct_type) == VMCIOBJ_CONTEXT) {
> > > > > >
> > > > > > This is getting tricky, why not use a normal lock to ensure that all is
> > > > > > safe? close isn't on a "fast path", so this shouldn't be a speed issue,
> > > > > > right?
> > > > > >
> > > > >
> > > > > I think using locks can be considered orthogonal to correcting memory ordering.
> > > >
> > > > But they ensure proper memory ordering.
> > >
> > > Yes, using a lock obviously ensures memory ordering.
> > >
> > > > > If the lock is needed, we will need to add locks in all of them. I cannot be
> > > > > sure which is better. Besides that, it seems to be a separate issue.
> > > >
> > > > Nope, I think it's the same issue :)
> > > >
> > > > > On the other hand, the current implementation doesn't guarantee memory ordering
> > > > > which leads to wrong behavior.
> > > > > This patch fixes this issue by adding primitives.
> > > >
> > > > But it's still wrong, again, what keeps the value from changing right
> > > > after checking it?
> > > >
> > > > thanks,
> > > >
> > > > greg k-h
> > >
> > > It seems that VMCI assumes that vmci_host_dev->context is not NULL if
> > > vmci_host_dev->ct_type == VMCIOBJ_CONTEXT (because all readers of
> > > vmci_host_dev->context check whether vmci_host_dev->ct_type is
> > > VMCIOBJ_CONTEXT or not, and access vmci_host_dev->context without
> > > checking whether is it NULL or not). So I think this patch clarifies
> > > this assumption.
> > >
> > > As you said, we need to ensure that vmci_host_dev->context is not
> > > changed after checking vmci_host_dev->ct_type. But
> > > (1) the only place that changes vmci_host_dev->context is
> > > vmci_host_close() and
> >
> > Then why is it even checked in close()?
>
> It is because close() needs to destory vmci_host_dev->context if it is
> created.
>
> > > (2) (I think) vmci_host_close() do not concurrently run with readers
> > > of vmci_host_dev->context. IIUC, all readers of vmci_host_dev->context
> > > are system calls (eg, ioctl handlers or the poll handler). So I think
> > > the ref count of the file saves us here. (Otherwise, Syzkaller will
> > > tell us the truth maybe?)
> >
> > Ok, then why is this needed to be checked then at all?
>
> It is because vmci_host_dev->context is created by
> ioctl(IOCTL_VMCI_INIT_CONTEXT). So it is possible that vmci_host_dev
> is created but vmci_host_dev->context is *not* created. All other
> places should be careful of this.
>
> > > At least, this patch introduces no change of the logic but the
> > > guarantees of the memory ordering, so I think this patch is safe?
> >
> > I think the logic is incorrect, don't try to paper over it thinking that
> > the issue to be solved is "memory ordering" please. Solve the root
> > issue here.
>
> I don't exactly get the point what you think the root issue is.
>
> We can have a system call sequence like this:
> fd = open("/dev/vmci")
> ioctl(fd, VMCI_VERSION2, user_version)
> ioctl(fd, INIT_CONTEXT) // this somewhat depends on ioctl(VMCI_VERSION2) as it runs `context->user_version = user_version`
>
> Between open() and ioctl(INIT_CONTEXT), we have vmci_host_dev
> initialized but vmci_host_dev->context is not initialized. We need to
> check whether vmci_host_dev->context is initialized in other
> places. And I still think store_release/load_acquire can be used to
> declare that context is created and check whether context is created
> or not. Please excuse me if I'm wrong...
What race can happen here that you are trying to prevent? If you don't
think a lock is needed as all you want to do is ensure that context is
set properly, then it doesn't matter if you race with a different
process to that check, as that is going to happen from different
processes and you can't guarantee anything there (even with a lock, one
will win first.)
So I don't see the problem here, except for the complexity involved in
this code for trying to keep a state of the device in a local variable
that can be accessed by multiple callers at the same time and somehow
userspace thinks it will be automatically synced properly despite it not
wanting to break it on purpose?
Either use a real lock because you want to guarantee ordering here, or
just leave it alone as it doesn't matter.
To be explicit, is this something that you are seeing in real workloads
for this driver? If so, what is the call trace that is happening and
who is in charge of that userspace code?
thanks,
greg k-h