2017-11-29 16:39:38

by Serhii Popovych

[permalink] [raw]
Subject: [PATCH 0/4] Fix use after free in HPT resizing code and related minor improvements

It is possible to trigger use after free during HPT resize
causing host kernel to crash. More details and analysis of
the problem can be found in change with corresponding subject
(KVM: PPC: Book3S HV: Fix use after free in case of multiple
resize requests).

We need some changes to prepare for the fix, especially
make ->error in HPT resize instance single point for
tracking allocation state, improve kvmppc_allocate_hpt()
and kvmppc_free_hpt() so they can be used more safely.

See individual commit description message to get more
information on changes presented.

Serhii Popovych (4):
KVM: PPC: Book3S HV: Drop prepare_done from struct kvm_resize_hpt and
cleanups
KVM: PPC: Book3S HV: Improve kvmppc_allocate_hpt()/kvmppc_free_hpt()
KVM: PPC: Book3S HV: Fix use after free in case of multiple resize
requests
KVM: PPC: Book3S HV: Remove redundant parameter from
resize_hpt_release()

arch/powerpc/kvm/book3s_64_mmu_hv.c | 139 +++++++++++++++++++++---------------
1 file changed, 82 insertions(+), 57 deletions(-)

--
1.8.3.1


From 1586045636308084061@xxx Wed Dec 06 14:36:27 +0000 2017
X-GM-THRID: 1585791720041775997
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread


2017-11-29 18:22:50

by Serhii Popovych

[permalink] [raw]
Subject: [PATCH 4/4] KVM: PPC: Book3S HV: Remove redundant parameter from resize_hpt_release()

There is no need to pass it explicitly from the caller:
struct kvm_resize_hpt already contains it.

Additional benefit from this change is that BUG_ON()
assertion now checks that mutex is held on kvm instance
associated with resize structure we going to release.

Also kill check for resize being NULL to make code
simpler and we called with resize != NULL in all
places except kvm_vm_ioctl_resize_hpt_commit().

Signed-off-by: Serhii Popovych <[email protected]>
---
arch/powerpc/kvm/book3s_64_mmu_hv.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 690f061..a74a0ad 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1415,12 +1415,11 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
}

-static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
+static void resize_hpt_release(struct kvm_resize_hpt *resize)
{
- BUG_ON(!mutex_is_locked(&kvm->lock));
+ struct kvm *kvm = resize->kvm;

- if (!resize)
- return;
+ BUG_ON(!mutex_is_locked(&kvm->lock));

if (resize->error != -EBUSY) {
kvmppc_free_hpt(&resize->hpt);
@@ -1469,7 +1468,7 @@ static void resize_hpt_prepare_work(struct work_struct *work)
resize->error = err;

if (kvm->arch.resize_hpt != resize)
- resize_hpt_release(kvm, resize);
+ resize_hpt_release(resize);

mutex_unlock(&kvm->lock);
}
@@ -1499,13 +1498,13 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
if (ret == -EBUSY)
ret = 100; /* estimated time in ms */
else if (ret)
- resize_hpt_release(kvm, resize);
+ resize_hpt_release(resize);

goto out;
}

/* not suitable, cancel it */
- resize_hpt_release(kvm, resize);
+ resize_hpt_release(resize);
}

ret = 0;
@@ -1590,7 +1589,8 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
kvm->arch.mmu_ready = 1;
smp_mb();
out_no_hpt:
- resize_hpt_release(kvm, resize);
+ if (resize)
+ resize_hpt_release(resize);
mutex_unlock(&kvm->lock);
return ret;
}
--
1.8.3.1


From 1585425578567390645@xxx Wed Nov 29 18:20:54 +0000 2017
X-GM-THRID: 1585368072773716674
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-29 16:40:40

by Serhii Popovych

[permalink] [raw]
Subject: [PATCH 2/4] KVM: PPC: Book3S HV: Improve kvmppc_allocate_hpt()/kvmppc_free_hpt()

There are several points of improvements:

1) Make kvmppc_free_hpt() check if allocation is made before attempt
to release. This follows kfree(p) semantics where p == NULL.

2) Return initialized @info parameter from kvmppc_allocate_hpt()
even if allocation fails.

This allows to use kvmppc_free_hpt() in the caller without
checking that preceded kvmppc_allocate_hpt() was successful

p = kmalloc(size, gfp);
kfree(p);

which is correct for both p != NULL and p == NULL. Followup
change will rely on this behaviour.

3) Better code reuse: kvmppc_free_hpt() can be reused on error
path in kvmppc_allocate_hpt() to avoid code duplication.

4) No need to check for !hpt if allocated from CMA: neither
pfn_to_kaddr() nor page_to_pfn() is 0 in case of page != NULL.

Signed-off-by: Serhii Popovych <[email protected]>
---
arch/powerpc/kvm/book3s_64_mmu_hv.c | 54 ++++++++++++++++++-------------------
1 file changed, 26 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 0534aab..3e9abd9 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -82,47 +82,44 @@ struct kvm_resize_hpt {
int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
{
unsigned long hpt = 0;
- int cma = 0;
- struct page *page = NULL;
- struct revmap_entry *rev;
+ int err, cma = 0;
+ struct page *page;
+ struct revmap_entry *rev = NULL;
unsigned long npte;

+ err = -EINVAL;
if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER))
- return -EINVAL;
+ goto out;

+ err = -ENOMEM;
page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
memset((void *)hpt, 0, (1ul << order));
cma = 1;
- }
-
- if (!hpt)
+ } else {
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
|__GFP_NOWARN, order - PAGE_SHIFT);
-
- if (!hpt)
- return -ENOMEM;
+ if (!hpt)
+ goto out;
+ }

/* HPTEs are 2**4 bytes long */
npte = 1ul << (order - 4);

/* Allocate reverse map array */
- rev = vmalloc(sizeof(struct revmap_entry) * npte);
- if (!rev) {
- if (cma)
- kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
- else
- free_pages(hpt, order - PAGE_SHIFT);
- return -ENOMEM;
- }
-
+ rev = vmalloc(sizeof(*rev) * npte);
+ if (rev)
+ err = 0;
+out:
info->order = order;
info->virt = hpt;
info->cma = cma;
info->rev = rev;

- return 0;
+ if (err)
+ kvmppc_free_hpt(info);
+ return err;
}

void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
@@ -190,12 +187,14 @@ void kvmppc_free_hpt(struct kvm_hpt_info *info)
{
vfree(info->rev);
info->rev = NULL;
- if (info->cma)
- kvm_free_hpt_cma(virt_to_page(info->virt),
- 1 << (info->order - PAGE_SHIFT));
- else if (info->virt)
- free_pages(info->virt, info->order - PAGE_SHIFT);
- info->virt = 0;
+ if (info->virt) {
+ if (info->cma)
+ kvm_free_hpt_cma(virt_to_page(info->virt),
+ 1 << (info->order - PAGE_SHIFT));
+ else
+ free_pages(info->virt, info->order - PAGE_SHIFT);
+ info->virt = 0;
+ }
info->order = 0;
}

@@ -1423,8 +1422,7 @@ static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
if (!resize)
return;

- if (resize->hpt.virt)
- kvmppc_free_hpt(&resize->hpt);
+ kvmppc_free_hpt(&resize->hpt);

kvm->arch.resize_hpt = NULL;
kfree(resize);
--
1.8.3.1


From 1585871540607046164@xxx Mon Dec 04 16:29:17 +0000 2017
X-GM-THRID: 1585871540607046164
X-Gmail-Labels: Inbox,Category Promotions,HistoricalUnread

2017-11-29 16:39:53

by Serhii Popovych

[permalink] [raw]
Subject: [PATCH 3/4] KVM: PPC: Book3S HV: Fix use after free in case of multiple resize requests

When serving multiple resize requests following could happen:

CPU0 CPU1
---- ----
kvm_vm_ioctl_resize_hpt_prepare(1);
-> schedule_work()
/* system_rq might be busy: delay */
kvm_vm_ioctl_resize_hpt_prepare(2);
mutex_lock();
if (resize) {
...
release_hpt_resize();
}
... resize_hpt_prepare_work()
-> schedule_work() {
mutex_unlock() /* resize->kvm could be wrong */
struct kvm *kvm = resize->kvm;

mutex_lock(&kvm->lock); <<<< UAF
...
}

Since scheduled work could be delayed (e.g. when system is busy) we
another resize request with different order could be promoted by
kvm_vm_ioctl_resize_hpt_prepare() and previous request could be
freed right before resize_hpt_prepare_work() begins execution or
right under mutex_lock() when it is executed in parallel with ioctl.

In both cases we get use after free in point marked with UAF on the
diagram above.

To prevent this from happening we must not release previous request
immediately in ioctl instead postponing this to resize_hpt_prepare_work().

Make resize_hpt_release() generic: we use it in more cases.

This fixes following or similar host kernel crash message:

[ 635.277361] Unable to handle kernel paging request for data at address 0x00000000
[ 635.277438] Faulting instruction address: 0xc00000000052f568
[ 635.277446] Oops: Kernel access of bad area, sig: 11 [#1]
[ 635.277451] SMP NR_CPUS=2048 NUMA PowerNV
[ 635.277470] Modules linked in: xt_CHECKSUM iptable_mangle ipt_MASQUERADE
nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4
nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc
ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter nfsv3 nfs_acl nfs
lockd grace fscache kvm_hv kvm rpcrdma sunrpc ib_isert iscsi_target_mod ib_iser libiscsi
scsi_transport_iscsi ib_srpt target_core_mod ext4 ib_srp scsi_transport_srp
ib_ipoib mbcache jbd2 rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ocrdma(T)
ib_core ses enclosure scsi_transport_sas sg shpchp leds_powernv ibmpowernv i2c_opal
i2c_core powernv_rng ipmi_powernv ipmi_devintf ipmi_msghandler ip_tables xfs
libcrc32c sr_mod sd_mod cdrom lpfc nvme_fc(T) nvme_fabrics nvme_core ipr nvmet_fc(T)
tg3 nvmet libata be2net crc_t10dif crct10dif_generic scsi_transport_fc ptp scsi_tgt
pps_core crct10dif_common dm_mirror dm_region_hash dm_log dm_mod
[ 635.278687] CPU: 40 PID: 749 Comm: kworker/40:1 Tainted: G
------------ T 3.10.0.bz1510771+ #1
[ 635.278782] Workqueue: events resize_hpt_prepare_work [kvm_hv]
[ 635.278851] task: c0000007e6840000 ti: c0000007e9180000 task.ti: c0000007e9180000
[ 635.278919] NIP: c00000000052f568 LR: c0000000009ea310 CTR: c0000000009ea4f0
[ 635.278988] REGS: c0000007e91837f0 TRAP: 0300 Tainted: G
------------ T (3.10.0.bz1510771+)
[ 635.279077] MSR: 9000000100009033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 24002022 XER:
00000000
[ 635.279248] CFAR: c000000000009368 DAR: 0000000000000000 DSISR: 40000000 SOFTE: 1
GPR00: c0000000009ea310 c0000007e9183a70 c000000001250b00 c0000007e9183b10
GPR04: 0000000000000000 0000000000000000 c0000007e9183650 0000000000000000
GPR08: c0000007ffff7b80 00000000ffffffff 0000000080000028 d00000000d2529a0
GPR12: 0000000000002200 c000000007b56800 c000000000120028 c0000007f135bb40
GPR16: 0000000000000000 c000000005c1e018 c000000005c1e018 0000000000000000
GPR20: 0000000000000001 c0000000011bf778 0000000000000001 fffffffffffffef7
GPR24: 0000000000000000 c000000f1e262e50 0000000000000002 c0000007e9180000
GPR28: c000000f1e262e4c c000000f1e262e50 0000000000000000 c0000007e9183b10
[ 635.280149] NIP [c00000000052f568] __list_add+0x38/0x110
[ 635.280197] LR [c0000000009ea310] __mutex_lock_slowpath+0xe0/0x2c0
[ 635.280253] Call Trace:
[ 635.280277] [c0000007e9183af0] [c0000000009ea310] __mutex_lock_slowpath+0xe0/0x2c0
[ 635.280356] [c0000007e9183b70] [c0000000009ea554] mutex_lock+0x64/0x70
[ 635.280426] [c0000007e9183ba0] [d00000000d24da04]
resize_hpt_prepare_work+0xe4/0x1c0 [kvm_hv]
[ 635.280507] [c0000007e9183c40] [c000000000113c0c] process_one_work+0x1dc/0x680
[ 635.280587] [c0000007e9183ce0] [c000000000114250] worker_thread+0x1a0/0x520
[ 635.280655] [c0000007e9183d80] [c00000000012010c] kthread+0xec/0x100
[ 635.280724] [c0000007e9183e30] [c00000000000a4b8] ret_from_kernel_thread+0x5c/0xa4
[ 635.280814] Instruction dump:
[ 635.280880] 7c0802a6 fba1ffe8 fbc1fff0 7cbd2b78 fbe1fff8 7c9e2378 7c7f1b78
f8010010
[ 635.281099] f821ff81 e8a50008 7fa52040 40de00b8 <e8be0000> 7fbd2840 40de008c
7fbff040
[ 635.281324] ---[ end trace b628b73449719b9d ]---

Signed-off-by: Serhii Popovych <[email protected]>
---
arch/powerpc/kvm/book3s_64_mmu_hv.c | 45 ++++++++++++++++++++++++++-----------
1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 3e9abd9..690f061 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1417,15 +1417,18 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)

static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
{
- BUG_ON(kvm->arch.resize_hpt != resize);
+ BUG_ON(!mutex_is_locked(&kvm->lock));

if (!resize)
return;

- kvmppc_free_hpt(&resize->hpt);
+ if (resize->error != -EBUSY) {
+ kvmppc_free_hpt(&resize->hpt);
+ kfree(resize);
+ }

- kvm->arch.resize_hpt = NULL;
- kfree(resize);
+ if (kvm->arch.resize_hpt == resize)
+ kvm->arch.resize_hpt = NULL;
}

static void resize_hpt_prepare_work(struct work_struct *work)
@@ -1434,24 +1437,40 @@ static void resize_hpt_prepare_work(struct work_struct *work)
struct kvm_resize_hpt,
work);
struct kvm *kvm = resize->kvm;
- int err;
+ int err = 0;

BUG_ON(resize->error != -EBUSY);

- resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
- resize->order);
+ mutex_lock(&kvm->lock);
+
+ /* Request is still current? */
+ if (kvm->arch.resize_hpt == resize) {
+ /* We may request large allocations here:
+ * do not sleep with kvm->lock held for a while.
+ */
+ mutex_unlock(&kvm->lock);

- err = resize_hpt_allocate(resize);
+ resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+ resize->order);

- /* We have strict assumption about -EBUSY
- * when preparing for HPT resize.
- */
- BUG_ON(err == -EBUSY);
+ err = resize_hpt_allocate(resize);

- mutex_lock(&kvm->lock);
+ /* We have strict assumption about -EBUSY
+ * when preparing for HPT resize.
+ */
+ BUG_ON(err == -EBUSY);
+
+ mutex_lock(&kvm->lock);
+ /* It is possible that kvm->arch.resize_hpt != resize
+ * after we grab kvm->lock again.
+ */
+ }

resize->error = err;

+ if (kvm->arch.resize_hpt != resize)
+ resize_hpt_release(kvm, resize);
+
mutex_unlock(&kvm->lock);
}

--
1.8.3.1


From 1585427555182852591@xxx Wed Nov 29 18:52:19 +0000 2017
X-GM-THRID: 1585427555182852591
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-29 16:39:51

by Serhii Popovych

[permalink] [raw]
Subject: [PATCH 1/4] KVM: PPC: Book3S HV: Drop prepare_done from struct kvm_resize_hpt and cleanups

Replace ->prepare_done flag functionality with special handling
of -EBUSY in ->error as indicator that allocation work is running.

Besides cosmetics this reduces size of struct kvm_resize_hpt by
__alignof__(struct kvm_hpt_info) and saves few bytes of code.

While there correct comment in struct kvm_resize_hpt about locking
used to protect access to certain fields.

Assert with BUG_ON() in case of HPT allocation thread work runs
more than once for resize request or resize_hpt_allocate()
returns -EBUSY that is treated specially.

Change comparison against zero to make checkpatch.pl happy.

Signed-off-by: Serhii Popovych <[email protected]>
---
arch/powerpc/kvm/book3s_64_mmu_hv.c | 42 ++++++++++++++++++++++---------------
1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 235319c..0534aab 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -65,11 +65,17 @@ struct kvm_resize_hpt {
u32 order;

/* These fields protected by kvm->lock */
+
+ /* Possible values and their usage:
+ * <0 an error occurred during allocation,
+ * -EBUSY allocation is in the progress,
+ * 0 allocation made successfuly.
+ */
int error;
- bool prepare_done;

- /* Private to the work thread, until prepare_done is true,
- * then protected by kvm->resize_hpt_sem */
+ /* Private to the work thread, until error != -EBUSY,
+ * then protected by kvm->lock.
+ */
struct kvm_hpt_info hpt;
};

@@ -1432,15 +1438,21 @@ static void resize_hpt_prepare_work(struct work_struct *work)
struct kvm *kvm = resize->kvm;
int err;

+ BUG_ON(resize->error != -EBUSY);
+
resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
resize->order);

err = resize_hpt_allocate(resize);

+ /* We have strict assumption about -EBUSY
+ * when preparing for HPT resize.
+ */
+ BUG_ON(err == -EBUSY);
+
mutex_lock(&kvm->lock);

resize->error = err;
- resize->prepare_done = true;

mutex_unlock(&kvm->lock);
}
@@ -1465,14 +1477,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,

if (resize) {
if (resize->order == shift) {
- /* Suitable resize in progress */
- if (resize->prepare_done) {
- ret = resize->error;
- if (ret != 0)
- resize_hpt_release(kvm, resize);
- } else {
+ /* Suitable resize in progress? */
+ ret = resize->error;
+ if (ret == -EBUSY)
ret = 100; /* estimated time in ms */
- }
+ else if (ret)
+ resize_hpt_release(kvm, resize);

goto out;
}
@@ -1492,6 +1502,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
ret = -ENOMEM;
goto out;
}
+
+ resize->error = -EBUSY;
resize->order = shift;
resize->kvm = kvm;
INIT_WORK(&resize->work, resize_hpt_prepare_work);
@@ -1546,16 +1558,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
if (!resize || (resize->order != shift))
goto out;

- ret = -EBUSY;
- if (!resize->prepare_done)
- goto out;
-
ret = resize->error;
- if (ret != 0)
+ if (ret)
goto out;

ret = resize_hpt_rehash(resize);
- if (ret != 0)
+ if (ret)
goto out;

resize_hpt_pivot(resize);
--
1.8.3.1


From 1585281604460145442@xxx Tue Nov 28 04:12:30 +0000 2017
X-GM-THRID: 1585281604460145442
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread