KASAN learns about hot added memory via the memory hotplug notifier.
The devm_memremap_pages() intentionally skips calling memory hotplug
notifiers. So KASAN doesn't know anything about new memory added
by devm_memremap_pages(). This causes to crash when KASAN tries to
access non-existent shadow memory:
BUG: unable to handle kernel paging request at ffffed0078000000
RIP: 0010:check_memory_region+0x82/0x1e0
Call Trace:
memcpy+0x1f/0x50
pmem_do_bvec+0x163/0x720
pmem_make_request+0x305/0xac0
generic_make_request+0x54f/0xcf0
submit_bio+0x9c/0x370
submit_bh_wbc+0x4c7/0x700
block_read_full_page+0x5ef/0x870
do_read_cache_page+0x2b8/0xb30
read_dev_sector+0xbd/0x3f0
read_lba.isra.0+0x277/0x670
efi_partition+0x41a/0x18f0
check_partition+0x30d/0x5e9
rescan_partitions+0x18c/0x840
__blkdev_get+0x859/0x1060
blkdev_get+0x23f/0x810
__device_add_disk+0x9c8/0xde0
pmem_attach_disk+0x9a8/0xf50
nvdimm_bus_probe+0xf3/0x3c0
driver_probe_device+0x493/0xbd0
bus_for_each_drv+0x118/0x1b0
__device_attach+0x1cd/0x2b0
bus_probe_device+0x1ac/0x260
device_add+0x90d/0x1380
nd_async_device_register+0xe/0x50
async_run_entry_fn+0xc3/0x5d0
process_one_work+0xa0a/0x1810
worker_thread+0x87/0xe80
kthread+0x2d7/0x390
ret_from_fork+0x3a/0x50
Add kasan_add_zero_shadow()/kasan_remove_zero_shadow() - post mm_init()
interface to map/unmap kasan_zero_page at requested virtual addresses.
And use it to add/remove the shadow memory for hotpluged/unpluged
device memory.
Reported-by: Dave Chinner <[email protected]>
Signed-off-by: Andrey Ryabinin <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Alexander Potapenko <[email protected]>
---
include/linux/kasan.h | 13 ++-
kernel/memremap.c | 10 ++
mm/kasan/kasan_init.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 325 insertions(+), 14 deletions(-)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index de784fd11d12..46aae129917c 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -20,7 +20,7 @@ extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
extern pud_t kasan_zero_pud[PTRS_PER_PUD];
extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D];
-void kasan_populate_zero_shadow(const void *shadow_start,
+int kasan_populate_zero_shadow(const void *shadow_start,
const void *shadow_end);
static inline void *kasan_mem_to_shadow(const void *addr)
@@ -71,6 +71,9 @@ struct kasan_cache {
int kasan_module_alloc(void *addr, size_t size);
void kasan_free_shadow(const struct vm_struct *vm);
+int kasan_add_zero_shadow(void *start, unsigned long size);
+void kasan_remove_zero_shadow(void *start, unsigned long size);
+
size_t ksize(const void *);
static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
size_t kasan_metadata_size(struct kmem_cache *cache);
@@ -124,6 +127,14 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
static inline void kasan_free_shadow(const struct vm_struct *vm) {}
+static inline int kasan_add_zero_shadow(void *start, unsigned long size)
+{
+ return 0;
+}
+static inline void kasan_remove_zero_shadow(void *start,
+ unsigned long size)
+{}
+
static inline void kasan_unpoison_slab(const void *ptr) { }
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 5857267a4af5..172264bf5812 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/pfn_t.h>
#include <linux/io.h>
+#include <linux/kasan.h>
#include <linux/mm.h>
#include <linux/memory_hotplug.h>
#include <linux/swap.h>
@@ -137,6 +138,7 @@ static void devm_memremap_pages_release(void *data)
mem_hotplug_begin();
arch_remove_memory(align_start, align_size, pgmap->altmap_valid ?
&pgmap->altmap : NULL);
+ kasan_remove_zero_shadow(__va(align_start), align_size);
mem_hotplug_done();
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
@@ -223,6 +225,12 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
goto err_pfn_remap;
mem_hotplug_begin();
+ error = kasan_add_zero_shadow(__va(align_start), align_size);
+ if (error) {
+ mem_hotplug_done();
+ goto err_kasan;
+ }
+
error = arch_add_memory(nid, align_start, align_size, altmap, false);
if (!error)
move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
@@ -251,6 +259,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
return __va(res->start);
err_add_memory:
+ kasan_remove_zero_shadow(__va(align_start), align_size);
+ err_kasan:
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
err_pfn_remap:
err_radix:
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index f436246ccc79..3ae77df8f414 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -21,6 +21,8 @@
#include <asm/page.h>
#include <asm/pgalloc.h>
+#include "kasan.h"
+
/*
* This page serves two purposes:
* - It used as early shadow memory. The entire shadow region populated
@@ -32,22 +34,59 @@ unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
#if CONFIG_PGTABLE_LEVELS > 4
p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
+static inline bool kasan_p4d_table(pgd_t pgd)
+{
+ return __pa(pgd_page_vaddr(pgd)) == __pa_symbol(kasan_zero_p4d);
+}
+#else
+static inline bool kasan_p4d_table(pgd_t pgd)
+{
+ return 0;
+}
#endif
#if CONFIG_PGTABLE_LEVELS > 3
pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+ return __pa(p4d_page_vaddr(p4d)) == __pa_symbol(kasan_zero_pud);
+}
+#else
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+ return 0;
+}
#endif
#if CONFIG_PGTABLE_LEVELS > 2
pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss;
+static inline bool kasan_pmd_table(pud_t pud)
+{
+ return __pa(pud_page_vaddr(pud)) == __pa_symbol(kasan_zero_pmd);
+}
+#else
+static inline bool kasan_pmd_table(pud_t pud)
+{
+ return 0;
+}
#endif
pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss;
+static inline bool kasan_pte_table(pmd_t pmd)
+{
+ return __pa(pmd_page_vaddr(pmd)) == __pa_symbol(kasan_zero_pte);
+}
+
+static inline bool kasan_zero_page_entry(pte_t pte)
+{
+ return pte_pfn(pte) == PHYS_PFN(__pa_symbol(kasan_zero_page));
+}
+
static __init void *early_alloc(size_t size, int node)
{
return memblock_virt_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
BOOTMEM_ALLOC_ACCESSIBLE, node);
}
-static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __ref zero_pte_populate(pmd_t *pmd, unsigned long addr,
unsigned long end)
{
pte_t *pte = pte_offset_kernel(pmd, addr);
@@ -63,7 +102,7 @@ static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
}
}
-static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
+static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
unsigned long end)
{
pmd_t *pmd = pmd_offset(pud, addr);
@@ -78,14 +117,24 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
}
if (pmd_none(*pmd)) {
- pmd_populate_kernel(&init_mm, pmd,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ pte_t *p;
+
+ if (slab_is_available())
+ p = pte_alloc_one_kernel(&init_mm, addr);
+ else
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ if (!p)
+ return -ENOMEM;
+
+ pmd_populate_kernel(&init_mm, pmd, p);
}
zero_pte_populate(pmd, addr, next);
} while (pmd++, addr = next, addr != end);
+
+ return 0;
}
-static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
+static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
unsigned long end)
{
pud_t *pud = pud_offset(p4d, addr);
@@ -103,14 +152,25 @@ static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
}
if (pud_none(*pud)) {
- pud_populate(&init_mm, pud,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ pmd_t *p;
+
+ if (slab_is_available())
+ p = pmd_alloc_one(&init_mm, addr);
+ else
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+
+ if (!p)
+ return -ENOMEM;
+
+ pud_populate(&init_mm, pud, p);
}
zero_pmd_populate(pud, addr, next);
} while (pud++, addr = next, addr != end);
+
+ return 0;
}
-static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
+static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
unsigned long end)
{
p4d_t *p4d = p4d_offset(pgd, addr);
@@ -132,11 +192,21 @@ static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
}
if (p4d_none(*p4d)) {
- p4d_populate(&init_mm, p4d,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ pud_t *p;
+
+ if (slab_is_available())
+ p = pud_alloc_one(&init_mm, addr);
+ else
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ if (!p)
+ return -ENOMEM;
+
+ p4d_populate(&init_mm, p4d, p);
}
zero_pud_populate(p4d, addr, next);
} while (p4d++, addr = next, addr != end);
+
+ return 0;
}
/**
@@ -145,7 +215,7 @@ static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
* @shadow_start - start of the memory range to populate
* @shadow_end - end of the memory range to populate
*/
-void __init kasan_populate_zero_shadow(const void *shadow_start,
+int __ref kasan_populate_zero_shadow(const void *shadow_start,
const void *shadow_end)
{
unsigned long addr = (unsigned long)shadow_start;
@@ -191,9 +261,229 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
}
if (pgd_none(*pgd)) {
- pgd_populate(&init_mm, pgd,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ p4d_t *p;
+
+ if (slab_is_available())
+ p = p4d_alloc_one(&init_mm, addr);
+ else
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ if (!p)
+ return -ENOMEM;
+
+ pgd_populate(&init_mm, pgd, p);
}
zero_p4d_populate(pgd, addr, next);
} while (pgd++, addr = next, addr != end);
+
+ return 0;
+}
+
+static void kasan_free_pte(pte_t *pte_start, pmd_t *pmd)
+{
+ pte_t *pte;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte = pte_start + i;
+ if (!pte_none(*pte))
+ return;
+ }
+
+ pte_free_kernel(&init_mm, (pte_t *)pmd_page_vaddr(*pmd));
+ pmd_clear(pmd);
+}
+
+static void kasan_free_pmd(pmd_t *pmd_start, pud_t *pud)
+{
+ pmd_t *pmd;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ pmd = pmd_start + i;
+ if (!pmd_none(*pmd))
+ return;
+ }
+
+ pmd_free(&init_mm, (pmd_t *)pud_page_vaddr(*pud));
+ pud_clear(pud);
+}
+
+static void kasan_free_pud(pud_t *pud_start, p4d_t *p4d)
+{
+ pud_t *pud;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ pud = pud_start + i;
+ if (!pud_none(*pud))
+ return;
+ }
+
+ pud_free(&init_mm, (pud_t *)p4d_page_vaddr(*p4d));
+ p4d_clear(p4d);
+}
+
+static void kasan_free_p4d(p4d_t *p4d_start, pgd_t *pgd)
+{
+ p4d_t *p4d;
+ int i;
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ p4d = p4d_start + i;
+ if (!p4d_none(*p4d))
+ return;
+ }
+
+ p4d_free(&init_mm, (p4d_t *)pgd_page_vaddr(*pgd));
+ pgd_clear(pgd);
+}
+
+static void kasan_remove_pte_table(pte_t *pte, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next, pte++) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ if (next > end)
+ next = end;
+
+ if (!pte_present(*pte))
+ continue;
+
+ if (WARN_ON(!kasan_zero_page_entry(*pte)))
+ continue;
+ pte_clear(&init_mm, addr, pte);
+ }
+}
+
+static void kasan_remove_pmd_table(pmd_t *pmd, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next, pmd++) {
+ pte_t *pte;
+
+ next = pmd_addr_end(addr, end);
+
+ if (!pmd_present(*pmd))
+ continue;
+
+ if (kasan_pte_table(*pmd)) {
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE))
+ pmd_clear(pmd);
+ continue;
+ }
+ pte = pte_offset_kernel(pmd, addr);
+ kasan_remove_pte_table(pte, addr, next);
+ kasan_free_pte(pte_offset_kernel(pmd, 0), pmd);
+ }
+}
+
+static void kasan_remove_pud_table(pud_t *pud, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next, pud++) {
+ pmd_t *pmd, *pmd_base;
+
+ next = pud_addr_end(addr, end);
+
+ if (!pud_present(*pud))
+ continue;
+
+ if (kasan_pmd_table(*pud)) {
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
+ IS_ALIGNED(next, PUD_SIZE))
+ pud_clear(pud);
+ continue;
+ }
+ pmd = pmd_offset(pud, addr);
+ pmd_base = pmd_offset(pud, 0);
+ kasan_remove_pmd_table(pmd, addr, next);
+ kasan_free_pmd(pmd_base, pud);
+ }
+}
+
+static void kasan_remove_p4d_table(p4d_t *p4d, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next, p4d++) {
+ pud_t *pud;
+
+ next = p4d_addr_end(addr, end);
+
+ if (!p4d_present(*p4d))
+ continue;
+
+ if (kasan_pud_table(*p4d)) {
+ if (IS_ALIGNED(addr, P4D_SIZE) &&
+ IS_ALIGNED(next, P4D_SIZE))
+ p4d_clear(p4d);
+ continue;
+ }
+ pud = pud_offset(p4d, addr);
+ kasan_remove_pud_table(pud, addr, next);
+ kasan_free_pud(pud_offset(p4d, 0), p4d);
+ }
+}
+
+void kasan_remove_zero_shadow(void *start, unsigned long size)
+{
+ unsigned long addr, end, next;
+ pgd_t *pgd;
+
+ addr = (unsigned long)kasan_mem_to_shadow(start);
+ end = addr + (size >> KASAN_SHADOW_SCALE_SHIFT);
+
+ if (WARN_ON((unsigned long)start %
+ (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) ||
+ WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)))
+ return;
+
+ for (; addr < end; addr = next) {
+ p4d_t *p4d;
+
+ next = pgd_addr_end(addr, end);
+
+ pgd = pgd_offset_k(addr);
+ if (!pgd_present(*pgd))
+ continue;
+
+ if (kasan_p4d_table(*pgd)) {
+ if (IS_ALIGNED(addr, PGDIR_SIZE) &&
+ IS_ALIGNED(next, PGDIR_SIZE))
+ pgd_clear(pgd);
+ continue;
+ }
+
+ p4d = p4d_offset(pgd, addr);
+ kasan_remove_p4d_table(p4d, addr, next);
+ kasan_free_p4d(p4d_offset(pgd, 0), pgd);
+ }
+}
+
+int kasan_add_zero_shadow(void *start, unsigned long size)
+{
+ int ret;
+ void *shadow_start, *shadow_end;
+
+ shadow_start = kasan_mem_to_shadow(start);
+ shadow_end = shadow_start + (size >> KASAN_SHADOW_SCALE_SHIFT);
+
+ if (WARN_ON((unsigned long)start %
+ (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) ||
+ WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)))
+ return -EINVAL;
+
+ ret = kasan_populate_zero_shadow(shadow_start, shadow_end);
+ if (ret)
+ kasan_remove_zero_shadow(shadow_start,
+ size >> KASAN_SHADOW_SCALE_SHIFT);
+ return ret;
}
--
2.16.4
Hi Andrey,
I love your patch! Yet something to improve:
[auto build test ERROR on linus/master]
[also build test ERROR on v4.18-rc2]
[cannot apply to next-20180625]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Andrey-Ryabinin/kernel-memremap-kasan-Make-ZONE_DEVICE-with-work-with-KASAN/20180626-023131
config: xtensa-allyesconfig (attached as .config)
compiler: xtensa-linux-gcc (GCC) 8.1.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=8.1.0 make.cross ARCH=xtensa
All error/warnings (new ones prefixed by >>):
mm/kasan/kasan_init.c: In function 'kasan_populate_zero_shadow':
>> mm/kasan/kasan_init.c:267:9: error: implicit declaration of function 'p4d_alloc_one'; did you mean 'pud_alloc_one'? [-Werror=implicit-function-declaration]
p = p4d_alloc_one(&init_mm, addr);
^~~~~~~~~~~~~
pud_alloc_one
>> mm/kasan/kasan_init.c:267:7: warning: assignment to 'pgd_t *' {aka 'struct <anonymous> *'} from 'int' makes pointer from integer without a cast [-Wint-conversion]
p = p4d_alloc_one(&init_mm, addr);
^
cc1: some warnings being treated as errors
vim +267 mm/kasan/kasan_init.c
211
212 /**
213 * kasan_populate_zero_shadow - populate shadow memory region with
214 * kasan_zero_page
215 * @shadow_start - start of the memory range to populate
216 * @shadow_end - end of the memory range to populate
217 */
218 int __ref kasan_populate_zero_shadow(const void *shadow_start,
219 const void *shadow_end)
220 {
221 unsigned long addr = (unsigned long)shadow_start;
222 unsigned long end = (unsigned long)shadow_end;
223 pgd_t *pgd = pgd_offset_k(addr);
224 unsigned long next;
225
226 do {
227 next = pgd_addr_end(addr, end);
228
229 if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
230 p4d_t *p4d;
231 pud_t *pud;
232 pmd_t *pmd;
233
234 /*
235 * kasan_zero_pud should be populated with pmds
236 * at this moment.
237 * [pud,pmd]_populate*() below needed only for
238 * 3,2 - level page tables where we don't have
239 * puds,pmds, so pgd_populate(), pud_populate()
240 * is noops.
241 *
242 * The ifndef is required to avoid build breakage.
243 *
244 * With 5level-fixup.h, pgd_populate() is not nop and
245 * we reference kasan_zero_p4d. It's not defined
246 * unless 5-level paging enabled.
247 *
248 * The ifndef can be dropped once all KASAN-enabled
249 * architectures will switch to pgtable-nop4d.h.
250 */
251 #ifndef __ARCH_HAS_5LEVEL_HACK
252 pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_p4d));
253 #endif
254 p4d = p4d_offset(pgd, addr);
255 p4d_populate(&init_mm, p4d, lm_alias(kasan_zero_pud));
256 pud = pud_offset(p4d, addr);
257 pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd));
258 pmd = pmd_offset(pud, addr);
259 pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte));
260 continue;
261 }
262
263 if (pgd_none(*pgd)) {
264 p4d_t *p;
265
266 if (slab_is_available())
> 267 p = p4d_alloc_one(&init_mm, addr);
268 else
269 p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
270 if (!p)
271 return -ENOMEM;
272
273 pgd_populate(&init_mm, pgd, p);
274 }
275 zero_p4d_populate(pgd, addr, next);
276 } while (pgd++, addr = next, addr != end);
277
278 return 0;
279 }
280
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi Andrey,
I love your patch! Yet something to improve:
[auto build test ERROR on linus/master]
[also build test ERROR on v4.18-rc2]
[cannot apply to next-20180625]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Andrey-Ryabinin/kernel-memremap-kasan-Make-ZONE_DEVICE-with-work-with-KASAN/20180626-023131
config: arm64-allmodconfig (attached as .config)
compiler: aarch64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=arm64
All error/warnings (new ones prefixed by >>):
In file included from arch/arm64/include/asm/thread_info.h:30:0,
from include/linux/thread_info.h:38,
from include/asm-generic/preempt.h:5,
from ./arch/arm64/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:81,
from include/linux/spinlock.h:51,
from include/linux/mmzone.h:8,
from include/linux/bootmem.h:8,
from mm/kasan/kasan_init.c:13:
mm/kasan/kasan_init.c: In function 'kasan_pmd_table':
>> mm/kasan/kasan_init.c:63:14: error: implicit declaration of function 'pud_page_vaddr'; did you mean 'pud_page_paddr'? [-Werror=implicit-function-declaration]
return __pa(pud_page_vaddr(pud)) == __pa_symbol(kasan_zero_pmd);
^
arch/arm64/include/asm/memory.h:270:50: note: in definition of macro '__pa'
#define __pa(x) __virt_to_phys((unsigned long)(x))
^
mm/kasan/kasan_init.c: In function 'kasan_pte_table':
>> mm/kasan/kasan_init.c:75:14: error: implicit declaration of function 'pmd_page_vaddr'; did you mean 'pmd_page_paddr'? [-Werror=implicit-function-declaration]
return __pa(pmd_page_vaddr(pmd)) == __pa_symbol(kasan_zero_pte);
^
arch/arm64/include/asm/memory.h:270:50: note: in definition of macro '__pa'
#define __pa(x) __virt_to_phys((unsigned long)(x))
^
mm/kasan/kasan_init.c: In function 'zero_pmd_populate':
mm/kasan/kasan_init.c:122:8: error: implicit declaration of function 'slab_is_available'; did you mean 'si_mem_available'? [-Werror=implicit-function-declaration]
if (slab_is_available())
^~~~~~~~~~~~~~~~~
si_mem_available
mm/kasan/kasan_init.c: In function 'kasan_populate_zero_shadow':
>> mm/kasan/kasan_init.c:267:9: error: implicit declaration of function 'p4d_alloc_one'; did you mean 'pmd_alloc_one'? [-Werror=implicit-function-declaration]
p = p4d_alloc_one(&init_mm, addr);
^~~~~~~~~~~~~
pmd_alloc_one
>> mm/kasan/kasan_init.c:267:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
p = p4d_alloc_one(&init_mm, addr);
^
mm/kasan/kasan_init.c: In function 'kasan_free_pte':
>> mm/kasan/kasan_init.c:292:28: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
pte_free_kernel(&init_mm, (pte_t *)pmd_page_vaddr(*pmd));
^
mm/kasan/kasan_init.c: In function 'kasan_free_pmd':
mm/kasan/kasan_init.c:307:21: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
pmd_free(&init_mm, (pmd_t *)pud_page_vaddr(*pud));
^
cc1: some warnings being treated as errors
vim +63 mm/kasan/kasan_init.c
> 13 #include <linux/bootmem.h>
14 #include <linux/init.h>
15 #include <linux/kasan.h>
16 #include <linux/kernel.h>
17 #include <linux/memblock.h>
18 #include <linux/mm.h>
19 #include <linux/pfn.h>
20
21 #include <asm/page.h>
22 #include <asm/pgalloc.h>
23
24 #include "kasan.h"
25
26 /*
27 * This page serves two purposes:
28 * - It used as early shadow memory. The entire shadow region populated
29 * with this page, before we will be able to setup normal shadow memory.
30 * - Latter it reused it as zero shadow to cover large ranges of memory
31 * that allowed to access, but not handled by kasan (vmalloc/vmemmap ...).
32 */
33 unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
34
35 #if CONFIG_PGTABLE_LEVELS > 4
36 p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
37 static inline bool kasan_p4d_table(pgd_t pgd)
38 {
39 return __pa(pgd_page_vaddr(pgd)) == __pa_symbol(kasan_zero_p4d);
40 }
41 #else
42 static inline bool kasan_p4d_table(pgd_t pgd)
43 {
44 return 0;
45 }
46 #endif
47 #if CONFIG_PGTABLE_LEVELS > 3
48 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
49 static inline bool kasan_pud_table(p4d_t p4d)
50 {
51 return __pa(p4d_page_vaddr(p4d)) == __pa_symbol(kasan_zero_pud);
52 }
53 #else
54 static inline bool kasan_pud_table(p4d_t p4d)
55 {
56 return 0;
57 }
58 #endif
59 #if CONFIG_PGTABLE_LEVELS > 2
60 pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss;
61 static inline bool kasan_pmd_table(pud_t pud)
62 {
> 63 return __pa(pud_page_vaddr(pud)) == __pa_symbol(kasan_zero_pmd);
64 }
65 #else
66 static inline bool kasan_pmd_table(pud_t pud)
67 {
68 return 0;
69 }
70 #endif
71 pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss;
72
73 static inline bool kasan_pte_table(pmd_t pmd)
74 {
> 75 return __pa(pmd_page_vaddr(pmd)) == __pa_symbol(kasan_zero_pte);
76 }
77
78 static inline bool kasan_zero_page_entry(pte_t pte)
79 {
80 return pte_pfn(pte) == PHYS_PFN(__pa_symbol(kasan_zero_page));
81 }
82
83 static __init void *early_alloc(size_t size, int node)
84 {
85 return memblock_virt_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
86 BOOTMEM_ALLOC_ACCESSIBLE, node);
87 }
88
89 static void __ref zero_pte_populate(pmd_t *pmd, unsigned long addr,
90 unsigned long end)
91 {
92 pte_t *pte = pte_offset_kernel(pmd, addr);
93 pte_t zero_pte;
94
95 zero_pte = pfn_pte(PFN_DOWN(__pa_symbol(kasan_zero_page)), PAGE_KERNEL);
96 zero_pte = pte_wrprotect(zero_pte);
97
98 while (addr + PAGE_SIZE <= end) {
99 set_pte_at(&init_mm, addr, pte, zero_pte);
100 addr += PAGE_SIZE;
101 pte = pte_offset_kernel(pmd, addr);
102 }
103 }
104
105 static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
106 unsigned long end)
107 {
108 pmd_t *pmd = pmd_offset(pud, addr);
109 unsigned long next;
110
111 do {
112 next = pmd_addr_end(addr, end);
113
114 if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
115 pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte));
116 continue;
117 }
118
119 if (pmd_none(*pmd)) {
120 pte_t *p;
121
> 122 if (slab_is_available())
123 p = pte_alloc_one_kernel(&init_mm, addr);
124 else
125 p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
126 if (!p)
127 return -ENOMEM;
128
129 pmd_populate_kernel(&init_mm, pmd, p);
130 }
131 zero_pte_populate(pmd, addr, next);
132 } while (pmd++, addr = next, addr != end);
133
134 return 0;
135 }
136
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
KASAN learns about hot added memory via the memory hotplug notifier.
The devm_memremap_pages() intentionally skips calling memory hotplug
notifiers. So KASAN doesn't know anything about new memory added
by devm_memremap_pages(). This causes to crash when KASAN tries to
access non-existent shadow memory:
BUG: unable to handle kernel paging request at ffffed0078000000
RIP: 0010:check_memory_region+0x82/0x1e0
Call Trace:
memcpy+0x1f/0x50
pmem_do_bvec+0x163/0x720
pmem_make_request+0x305/0xac0
generic_make_request+0x54f/0xcf0
submit_bio+0x9c/0x370
submit_bh_wbc+0x4c7/0x700
block_read_full_page+0x5ef/0x870
do_read_cache_page+0x2b8/0xb30
read_dev_sector+0xbd/0x3f0
read_lba.isra.0+0x277/0x670
efi_partition+0x41a/0x18f0
check_partition+0x30d/0x5e9
rescan_partitions+0x18c/0x840
__blkdev_get+0x859/0x1060
blkdev_get+0x23f/0x810
__device_add_disk+0x9c8/0xde0
pmem_attach_disk+0x9a8/0xf50
nvdimm_bus_probe+0xf3/0x3c0
driver_probe_device+0x493/0xbd0
bus_for_each_drv+0x118/0x1b0
__device_attach+0x1cd/0x2b0
bus_probe_device+0x1ac/0x260
device_add+0x90d/0x1380
nd_async_device_register+0xe/0x50
async_run_entry_fn+0xc3/0x5d0
process_one_work+0xa0a/0x1810
worker_thread+0x87/0xe80
kthread+0x2d7/0x390
ret_from_fork+0x3a/0x50
Add kasan_add_zero_shadow()/kasan_remove_zero_shadow() - post mm_init()
interface to map/unmap kasan_zero_page at requested virtual addresses.
And use it to add/remove the shadow memory for hotpluged/unpluged
device memory.
Reported-by: Dave Chinner <[email protected]>
Signed-off-by: Andrey Ryabinin <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Alexander Potapenko <[email protected]>
---
Changes since v1:
- Compilation fixes.
include/linux/kasan.h | 13 ++-
kernel/memremap.c | 10 ++
mm/kasan/kasan_init.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 325 insertions(+), 14 deletions(-)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index de784fd11d12..46aae129917c 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -20,7 +20,7 @@ extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
extern pud_t kasan_zero_pud[PTRS_PER_PUD];
extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D];
-void kasan_populate_zero_shadow(const void *shadow_start,
+int kasan_populate_zero_shadow(const void *shadow_start,
const void *shadow_end);
static inline void *kasan_mem_to_shadow(const void *addr)
@@ -71,6 +71,9 @@ struct kasan_cache {
int kasan_module_alloc(void *addr, size_t size);
void kasan_free_shadow(const struct vm_struct *vm);
+int kasan_add_zero_shadow(void *start, unsigned long size);
+void kasan_remove_zero_shadow(void *start, unsigned long size);
+
size_t ksize(const void *);
static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
size_t kasan_metadata_size(struct kmem_cache *cache);
@@ -124,6 +127,14 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
static inline void kasan_free_shadow(const struct vm_struct *vm) {}
+static inline int kasan_add_zero_shadow(void *start, unsigned long size)
+{
+ return 0;
+}
+static inline void kasan_remove_zero_shadow(void *start,
+ unsigned long size)
+{}
+
static inline void kasan_unpoison_slab(const void *ptr) { }
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 5857267a4af5..172264bf5812 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/pfn_t.h>
#include <linux/io.h>
+#include <linux/kasan.h>
#include <linux/mm.h>
#include <linux/memory_hotplug.h>
#include <linux/swap.h>
@@ -137,6 +138,7 @@ static void devm_memremap_pages_release(void *data)
mem_hotplug_begin();
arch_remove_memory(align_start, align_size, pgmap->altmap_valid ?
&pgmap->altmap : NULL);
+ kasan_remove_zero_shadow(__va(align_start), align_size);
mem_hotplug_done();
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
@@ -223,6 +225,12 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
goto err_pfn_remap;
mem_hotplug_begin();
+ error = kasan_add_zero_shadow(__va(align_start), align_size);
+ if (error) {
+ mem_hotplug_done();
+ goto err_kasan;
+ }
+
error = arch_add_memory(nid, align_start, align_size, altmap, false);
if (!error)
move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
@@ -251,6 +259,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
return __va(res->start);
err_add_memory:
+ kasan_remove_zero_shadow(__va(align_start), align_size);
+ err_kasan:
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
err_pfn_remap:
err_radix:
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index f436246ccc79..7a2a2f13f86f 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -17,10 +17,13 @@
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/pfn.h>
+#include <linux/slab.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
+#include "kasan.h"
+
/*
* This page serves two purposes:
* - It used as early shadow memory. The entire shadow region populated
@@ -32,22 +35,59 @@ unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
#if CONFIG_PGTABLE_LEVELS > 4
p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
+static inline bool kasan_p4d_table(pgd_t pgd)
+{
+ return pgd_page(pgd) == virt_to_page(lm_alias(kasan_zero_p4d));
+}
+#else
+static inline bool kasan_p4d_table(pgd_t pgd)
+{
+ return 0;
+}
#endif
#if CONFIG_PGTABLE_LEVELS > 3
pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+ return p4d_page(p4d) == virt_to_page(lm_alias(kasan_zero_pud));
+}
+#else
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+ return 0;
+}
#endif
#if CONFIG_PGTABLE_LEVELS > 2
pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss;
+static inline bool kasan_pmd_table(pud_t pud)
+{
+ return pud_page(pud) == virt_to_page(lm_alias(kasan_zero_pmd));
+}
+#else
+static inline bool kasan_pmd_table(pud_t pud)
+{
+ return 0;
+}
#endif
pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss;
+static inline bool kasan_pte_table(pmd_t pmd)
+{
+ return pmd_page(pmd) == virt_to_page(lm_alias(kasan_zero_pte));
+}
+
+static inline bool kasan_zero_page_entry(pte_t pte)
+{
+ return pte_page(pte) == virt_to_page(lm_alias(kasan_zero_page));
+}
+
static __init void *early_alloc(size_t size, int node)
{
return memblock_virt_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
BOOTMEM_ALLOC_ACCESSIBLE, node);
}
-static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __ref zero_pte_populate(pmd_t *pmd, unsigned long addr,
unsigned long end)
{
pte_t *pte = pte_offset_kernel(pmd, addr);
@@ -63,7 +103,7 @@ static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
}
}
-static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
+static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
unsigned long end)
{
pmd_t *pmd = pmd_offset(pud, addr);
@@ -78,14 +118,24 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
}
if (pmd_none(*pmd)) {
- pmd_populate_kernel(&init_mm, pmd,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ pte_t *p;
+
+ if (slab_is_available())
+ p = pte_alloc_one_kernel(&init_mm, addr);
+ else
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ if (!p)
+ return -ENOMEM;
+
+ pmd_populate_kernel(&init_mm, pmd, p);
}
zero_pte_populate(pmd, addr, next);
} while (pmd++, addr = next, addr != end);
+
+ return 0;
}
-static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
+static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
unsigned long end)
{
pud_t *pud = pud_offset(p4d, addr);
@@ -103,14 +153,24 @@ static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
}
if (pud_none(*pud)) {
- pud_populate(&init_mm, pud,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ pmd_t *p;
+
+ if (slab_is_available()) {
+ p = pmd_alloc(&init_mm, pud, addr);
+ if (!p)
+ return -ENOMEM;
+ } else {
+ pud_populate(&init_mm, pud,
+ early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ }
}
zero_pmd_populate(pud, addr, next);
} while (pud++, addr = next, addr != end);
+
+ return 0;
}
-static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
+static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
unsigned long end)
{
p4d_t *p4d = p4d_offset(pgd, addr);
@@ -132,11 +192,21 @@ static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
}
if (p4d_none(*p4d)) {
- p4d_populate(&init_mm, p4d,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ pud_t *p;
+
+ if (slab_is_available()) {
+ p = pud_alloc(&init_mm, p4d, addr);
+ if (!p)
+ return -ENOMEM;
+ } else {
+ p4d_populate(&init_mm, p4d,
+ early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ }
}
zero_pud_populate(p4d, addr, next);
} while (p4d++, addr = next, addr != end);
+
+ return 0;
}
/**
@@ -145,7 +215,7 @@ static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
* @shadow_start - start of the memory range to populate
* @shadow_end - end of the memory range to populate
*/
-void __init kasan_populate_zero_shadow(const void *shadow_start,
+int __ref kasan_populate_zero_shadow(const void *shadow_start,
const void *shadow_end)
{
unsigned long addr = (unsigned long)shadow_start;
@@ -191,9 +261,229 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
}
if (pgd_none(*pgd)) {
- pgd_populate(&init_mm, pgd,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ p4d_t *p;
+
+ if (slab_is_available()) {
+ p = p4d_alloc(&init_mm, pgd, addr);
+ if (!p)
+ return -ENOMEM;
+ } else {
+ pgd_populate(&init_mm, pgd,
+ early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ }
}
zero_p4d_populate(pgd, addr, next);
} while (pgd++, addr = next, addr != end);
+
+ return 0;
+}
+
+static void kasan_free_pte(pte_t *pte_start, pmd_t *pmd)
+{
+ pte_t *pte;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte = pte_start + i;
+ if (!pte_none(*pte))
+ return;
+ }
+
+ pte_free_kernel(&init_mm, (pte_t *)page_to_virt(pmd_page(*pmd)));
+ pmd_clear(pmd);
+}
+
+static void kasan_free_pmd(pmd_t *pmd_start, pud_t *pud)
+{
+ pmd_t *pmd;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ pmd = pmd_start + i;
+ if (!pmd_none(*pmd))
+ return;
+ }
+
+ pmd_free(&init_mm, (pmd_t *)page_to_virt(pud_page(*pud)));
+ pud_clear(pud);
+}
+
+static void kasan_free_pud(pud_t *pud_start, p4d_t *p4d)
+{
+ pud_t *pud;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ pud = pud_start + i;
+ if (!pud_none(*pud))
+ return;
+ }
+
+ pud_free(&init_mm, (pud_t *)page_to_virt(p4d_page(*p4d)));
+ p4d_clear(p4d);
+}
+
+static void kasan_free_p4d(p4d_t *p4d_start, pgd_t *pgd)
+{
+ p4d_t *p4d;
+ int i;
+
+ for (i = 0; i < PTRS_PER_P4D; i++) {
+ p4d = p4d_start + i;
+ if (!p4d_none(*p4d))
+ return;
+ }
+
+ p4d_free(&init_mm, (p4d_t *)page_to_virt(pgd_page(*pgd)));
+ pgd_clear(pgd);
+}
+
+static void kasan_remove_pte_table(pte_t *pte, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next, pte++) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ if (next > end)
+ next = end;
+
+ if (!pte_present(*pte))
+ continue;
+
+ if (WARN_ON(!kasan_zero_page_entry(*pte)))
+ continue;
+ pte_clear(&init_mm, addr, pte);
+ }
+}
+
+static void kasan_remove_pmd_table(pmd_t *pmd, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next, pmd++) {
+ pte_t *pte;
+
+ next = pmd_addr_end(addr, end);
+
+ if (!pmd_present(*pmd))
+ continue;
+
+ if (kasan_pte_table(*pmd)) {
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE))
+ pmd_clear(pmd);
+ continue;
+ }
+ pte = pte_offset_kernel(pmd, addr);
+ kasan_remove_pte_table(pte, addr, next);
+ kasan_free_pte(pte_offset_kernel(pmd, 0), pmd);
+ }
+}
+
+static void kasan_remove_pud_table(pud_t *pud, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next, pud++) {
+ pmd_t *pmd, *pmd_base;
+
+ next = pud_addr_end(addr, end);
+
+ if (!pud_present(*pud))
+ continue;
+
+ if (kasan_pmd_table(*pud)) {
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
+ IS_ALIGNED(next, PUD_SIZE))
+ pud_clear(pud);
+ continue;
+ }
+ pmd = pmd_offset(pud, addr);
+ pmd_base = pmd_offset(pud, 0);
+ kasan_remove_pmd_table(pmd, addr, next);
+ kasan_free_pmd(pmd_base, pud);
+ }
+}
+
+static void kasan_remove_p4d_table(p4d_t *p4d, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next, p4d++) {
+ pud_t *pud;
+
+ next = p4d_addr_end(addr, end);
+
+ if (!p4d_present(*p4d))
+ continue;
+
+ if (kasan_pud_table(*p4d)) {
+ if (IS_ALIGNED(addr, P4D_SIZE) &&
+ IS_ALIGNED(next, P4D_SIZE))
+ p4d_clear(p4d);
+ continue;
+ }
+ pud = pud_offset(p4d, addr);
+ kasan_remove_pud_table(pud, addr, next);
+ kasan_free_pud(pud_offset(p4d, 0), p4d);
+ }
+}
+
+void kasan_remove_zero_shadow(void *start, unsigned long size)
+{
+ unsigned long addr, end, next;
+ pgd_t *pgd;
+
+ addr = (unsigned long)kasan_mem_to_shadow(start);
+ end = addr + (size >> KASAN_SHADOW_SCALE_SHIFT);
+
+ if (WARN_ON((unsigned long)start %
+ (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) ||
+ WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)))
+ return;
+
+ for (; addr < end; addr = next) {
+ p4d_t *p4d;
+
+ next = pgd_addr_end(addr, end);
+
+ pgd = pgd_offset_k(addr);
+ if (!pgd_present(*pgd))
+ continue;
+
+ if (kasan_p4d_table(*pgd)) {
+ if (IS_ALIGNED(addr, PGDIR_SIZE) &&
+ IS_ALIGNED(next, PGDIR_SIZE))
+ pgd_clear(pgd);
+ continue;
+ }
+
+ p4d = p4d_offset(pgd, addr);
+ kasan_remove_p4d_table(p4d, addr, next);
+ kasan_free_p4d(p4d_offset(pgd, 0), pgd);
+ }
+}
+
+int kasan_add_zero_shadow(void *start, unsigned long size)
+{
+ int ret;
+ void *shadow_start, *shadow_end;
+
+ shadow_start = kasan_mem_to_shadow(start);
+ shadow_end = shadow_start + (size >> KASAN_SHADOW_SCALE_SHIFT);
+
+ if (WARN_ON((unsigned long)start %
+ (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) ||
+ WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)))
+ return -EINVAL;
+
+ ret = kasan_populate_zero_shadow(shadow_start, shadow_end);
+ if (ret)
+ kasan_remove_zero_shadow(shadow_start,
+ size >> KASAN_SHADOW_SCALE_SHIFT);
+ return ret;
}
--
2.16.4
On Fri, Jun 29, 2018 at 9:49 AM, Andrey Ryabinin
<[email protected]> wrote:
> KASAN learns about hot added memory via the memory hotplug notifier.
> The devm_memremap_pages() intentionally skips calling memory hotplug
> notifiers. So KASAN doesn't know anything about new memory added
> by devm_memremap_pages(). This causes to crash when KASAN tries to
> access non-existent shadow memory:
>
> BUG: unable to handle kernel paging request at ffffed0078000000
> RIP: 0010:check_memory_region+0x82/0x1e0
> Call Trace:
> memcpy+0x1f/0x50
> pmem_do_bvec+0x163/0x720
> pmem_make_request+0x305/0xac0
> generic_make_request+0x54f/0xcf0
> submit_bio+0x9c/0x370
> submit_bh_wbc+0x4c7/0x700
> block_read_full_page+0x5ef/0x870
> do_read_cache_page+0x2b8/0xb30
> read_dev_sector+0xbd/0x3f0
> read_lba.isra.0+0x277/0x670
> efi_partition+0x41a/0x18f0
> check_partition+0x30d/0x5e9
> rescan_partitions+0x18c/0x840
> __blkdev_get+0x859/0x1060
> blkdev_get+0x23f/0x810
> __device_add_disk+0x9c8/0xde0
> pmem_attach_disk+0x9a8/0xf50
> nvdimm_bus_probe+0xf3/0x3c0
> driver_probe_device+0x493/0xbd0
> bus_for_each_drv+0x118/0x1b0
> __device_attach+0x1cd/0x2b0
> bus_probe_device+0x1ac/0x260
> device_add+0x90d/0x1380
> nd_async_device_register+0xe/0x50
> async_run_entry_fn+0xc3/0x5d0
> process_one_work+0xa0a/0x1810
> worker_thread+0x87/0xe80
> kthread+0x2d7/0x390
> ret_from_fork+0x3a/0x50
>
> Add kasan_add_zero_shadow()/kasan_remove_zero_shadow() - post mm_init()
> interface to map/unmap kasan_zero_page at requested virtual addresses.
> And use it to add/remove the shadow memory for hotpluged/unpluged
> device memory.
>
> Reported-by: Dave Chinner <[email protected]>
> Signed-off-by: Andrey Ryabinin <[email protected]>
> Cc: Dan Williams <[email protected]>
> Cc: Dmitry Vyukov <[email protected]>
> Cc: Alexander Potapenko <[email protected]>
Reviewed-and-tested-by: Dan Williams <[email protected]>
On Fri, 29 Jun 2018 19:49:32 +0300 Andrey Ryabinin <[email protected]> wrote:
> KASAN learns about hot added memory via the memory hotplug notifier.
> The devm_memremap_pages() intentionally skips calling memory hotplug
> notifiers.
Why does it do that?
> So KASAN doesn't know anything about new memory added
> by devm_memremap_pages(). This causes to crash when KASAN tries to
> access non-existent shadow memory:
>
> BUG: unable to handle kernel paging request at ffffed0078000000
> RIP: 0010:check_memory_region+0x82/0x1e0
> Call Trace:
> memcpy+0x1f/0x50
> pmem_do_bvec+0x163/0x720
> pmem_make_request+0x305/0xac0
> generic_make_request+0x54f/0xcf0
> submit_bio+0x9c/0x370
> submit_bh_wbc+0x4c7/0x700
> block_read_full_page+0x5ef/0x870
> do_read_cache_page+0x2b8/0xb30
> read_dev_sector+0xbd/0x3f0
> read_lba.isra.0+0x277/0x670
> efi_partition+0x41a/0x18f0
> check_partition+0x30d/0x5e9
> rescan_partitions+0x18c/0x840
> __blkdev_get+0x859/0x1060
> blkdev_get+0x23f/0x810
> __device_add_disk+0x9c8/0xde0
> pmem_attach_disk+0x9a8/0xf50
> nvdimm_bus_probe+0xf3/0x3c0
> driver_probe_device+0x493/0xbd0
> bus_for_each_drv+0x118/0x1b0
> __device_attach+0x1cd/0x2b0
> bus_probe_device+0x1ac/0x260
> device_add+0x90d/0x1380
> nd_async_device_register+0xe/0x50
> async_run_entry_fn+0xc3/0x5d0
> process_one_work+0xa0a/0x1810
> worker_thread+0x87/0xe80
> kthread+0x2d7/0x390
> ret_from_fork+0x3a/0x50
>
> Add kasan_add_zero_shadow()/kasan_remove_zero_shadow() - post mm_init()
> interface to map/unmap kasan_zero_page at requested virtual addresses.
> And use it to add/remove the shadow memory for hotpluged/unpluged
> device memory.
>
> Reported-by: Dave Chinner <[email protected]>
> Signed-off-by: Andrey Ryabinin <[email protected]>
> Cc: Dan Williams <[email protected]>
> Cc: Dmitry Vyukov <[email protected]>
> Cc: Alexander Potapenko <[email protected]>
No cc:stable? Which kernel version(s) do you believe need the fix?
> include/linux/kasan.h | 13 ++-
> kernel/memremap.c | 10 ++
> mm/kasan/kasan_init.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++---
It's a surprisingly large amount of ode to do something which KASAN
already does for hotplugged memory. How come?
On Fri, Jun 29, 2018 at 7:33 PM, Andrew Morton
<[email protected]> wrote:
> On Fri, 29 Jun 2018 19:49:32 +0300 Andrey Ryabinin <[email protected]> wrote:
>
>> KASAN learns about hot added memory via the memory hotplug notifier.
>> The devm_memremap_pages() intentionally skips calling memory hotplug
>> notifiers.
>
> Why does it do that?
devm_memremap_pages() deliberately does only half of memory hotplug.
Namely it only adds to the linear map and allocates / initializes
'struct page', but it never onlines the pages, so
devm_memremap_pages() generates none of the events that the hotplug
notifiers would publish.
>> So KASAN doesn't know anything about new memory added
>> by devm_memremap_pages(). This causes to crash when KASAN tries to
>> access non-existent shadow memory:
>>
>> BUG: unable to handle kernel paging request at ffffed0078000000
>> RIP: 0010:check_memory_region+0x82/0x1e0
>> Call Trace:
>> memcpy+0x1f/0x50
>> pmem_do_bvec+0x163/0x720
>> pmem_make_request+0x305/0xac0
>> generic_make_request+0x54f/0xcf0
>> submit_bio+0x9c/0x370
>> submit_bh_wbc+0x4c7/0x700
>> block_read_full_page+0x5ef/0x870
>> do_read_cache_page+0x2b8/0xb30
>> read_dev_sector+0xbd/0x3f0
>> read_lba.isra.0+0x277/0x670
>> efi_partition+0x41a/0x18f0
>> check_partition+0x30d/0x5e9
>> rescan_partitions+0x18c/0x840
>> __blkdev_get+0x859/0x1060
>> blkdev_get+0x23f/0x810
>> __device_add_disk+0x9c8/0xde0
>> pmem_attach_disk+0x9a8/0xf50
>> nvdimm_bus_probe+0xf3/0x3c0
>> driver_probe_device+0x493/0xbd0
>> bus_for_each_drv+0x118/0x1b0
>> __device_attach+0x1cd/0x2b0
>> bus_probe_device+0x1ac/0x260
>> device_add+0x90d/0x1380
>> nd_async_device_register+0xe/0x50
>> async_run_entry_fn+0xc3/0x5d0
>> process_one_work+0xa0a/0x1810
>> worker_thread+0x87/0xe80
>> kthread+0x2d7/0x390
>> ret_from_fork+0x3a/0x50
>>
>> Add kasan_add_zero_shadow()/kasan_remove_zero_shadow() - post mm_init()
>> interface to map/unmap kasan_zero_page at requested virtual addresses.
>> And use it to add/remove the shadow memory for hotpluged/unpluged
>> device memory.
>>
>> Reported-by: Dave Chinner <[email protected]>
>> Signed-off-by: Andrey Ryabinin <[email protected]>
>> Cc: Dan Williams <[email protected]>
>> Cc: Dmitry Vyukov <[email protected]>
>> Cc: Alexander Potapenko <[email protected]>
>
> No cc:stable? Which kernel version(s) do you believe need the fix?
I think devm_memremap_pages() was incompatible with KASAN from the
outset, so I would say:
Fixes: 41e94a851304 ("add devm_memremap_pages")
>
>> include/linux/kasan.h | 13 ++-
>> kernel/memremap.c | 10 ++
>> mm/kasan/kasan_init.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++---
>
> It's a surprisingly large amount of ode to do something which KASAN
> already does for hotplugged memory. How come?
On 06/30/2018 05:33 AM, Andrew Morton wrote:
> On Fri, 29 Jun 2018 19:49:32 +0300 Andrey Ryabinin <[email protected]> wrote:
>
>> KASAN learns about hot added memory via the memory hotplug notifier.
>> The devm_memremap_pages() intentionally skips calling memory hotplug
>> notifiers.
>
> Why does it do that?
>
>> So KASAN doesn't know anything about new memory added
>> by devm_memremap_pages(). This causes to crash when KASAN tries to
>> access non-existent shadow memory:
>>
>> BUG: unable to handle kernel paging request at ffffed0078000000
>> RIP: 0010:check_memory_region+0x82/0x1e0
>> Call Trace:
>> memcpy+0x1f/0x50
>> pmem_do_bvec+0x163/0x720
>> pmem_make_request+0x305/0xac0
>> generic_make_request+0x54f/0xcf0
>> submit_bio+0x9c/0x370
>> submit_bh_wbc+0x4c7/0x700
>> block_read_full_page+0x5ef/0x870
>> do_read_cache_page+0x2b8/0xb30
>> read_dev_sector+0xbd/0x3f0
>> read_lba.isra.0+0x277/0x670
>> efi_partition+0x41a/0x18f0
>> check_partition+0x30d/0x5e9
>> rescan_partitions+0x18c/0x840
>> __blkdev_get+0x859/0x1060
>> blkdev_get+0x23f/0x810
>> __device_add_disk+0x9c8/0xde0
>> pmem_attach_disk+0x9a8/0xf50
>> nvdimm_bus_probe+0xf3/0x3c0
>> driver_probe_device+0x493/0xbd0
>> bus_for_each_drv+0x118/0x1b0
>> __device_attach+0x1cd/0x2b0
>> bus_probe_device+0x1ac/0x260
>> device_add+0x90d/0x1380
>> nd_async_device_register+0xe/0x50
>> async_run_entry_fn+0xc3/0x5d0
>> process_one_work+0xa0a/0x1810
>> worker_thread+0x87/0xe80
>> kthread+0x2d7/0x390
>> ret_from_fork+0x3a/0x50
>>
>> Add kasan_add_zero_shadow()/kasan_remove_zero_shadow() - post mm_init()
>> interface to map/unmap kasan_zero_page at requested virtual addresses.
>> And use it to add/remove the shadow memory for hotpluged/unpluged
>> device memory.
>>
>> Reported-by: Dave Chinner <[email protected]>
>> Signed-off-by: Andrey Ryabinin <[email protected]>
>> Cc: Dan Williams <[email protected]>
>> Cc: Dmitry Vyukov <[email protected]>
>> Cc: Alexander Potapenko <[email protected]>
>
> No cc:stable?
I'm just not sure whether this should go to stable or not.
It's a gray area between a new functionality and a bug fix.
From one POV we fixing the bug here, but on the other hand, ZONE_DEVICE and KASAN
never worked together, so we add new functionality here.
> Which kernel version(s) do you believe need the fix?
I'd say the fix needed since fa69b5989bb0 ("mm/kasan: add support for memory hotplug")
Before that, the combination ZONE_DEVICE=Y and KASAN=y wasn't possible.
>
>> include/linux/kasan.h | 13 ++-
>> kernel/memremap.c | 10 ++
>> mm/kasan/kasan_init.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++---
>
> It's a surprisingly large amount of ode to do something which KASAN
> already does for hotplugged memory. How come?
For hotplugged memory we simply use __vmalloc_node_range()/vfree() to allocate and map the shadow at desired address.
We could do the same for a device memory, but the device memory isn't like ordinary memory.
alloc_page() or slab allocators doesn't work with device memory. We don't have concept of
free/allocated device memory, for KASAN it should look like it's always allocated.
Which means that the shadow of device memory is always contains zeroes. So, instead of allocating
bunch of memory to store zeroes, we just map kasan_zero_page.
The most part of the code to map kasan_zero_page is already exists, this patch makes this code usable after mm_init().
But we didn't have the code to unmap kasan_zero_page, so almost all newly added code in the patch is to unmap kasan_zero_page
(kasan_remove_zero_shadow()).
It could be possible to not unmap kasan_zero_page, just leave it there after devm_memremap_pages_release().
But we must have some guarantee that after devm_memremap_pages()/devm_memremap_pages_release() the same
addresses can't be reused for ordinary hotpluggable memory.
On Mon, Jul 2, 2018 at 10:22 AM, Andrey Ryabinin
<[email protected]> wrote:
[..]
> It could be possible to not unmap kasan_zero_page, just leave it there after devm_memremap_pages_release().
> But we must have some guarantee that after devm_memremap_pages()/devm_memremap_pages_release() the same
> addresses can't be reused for ordinary hotpluggable memory.
While this does not happen today we are looking to support it the
future. I.e. have userspace policy pick whether to access an address
range through a device-file mmap, or treat it as typical memory.