This patch added verification for userspace memory integrity after S3 resume.
Integrity verification for other memory (say kernel itself) has been done by tboot.
Signed-off-by: Shane Wang <[email protected]>
Signed-off-by: Joseph Cihula <[email protected]>
diff -r c878d454dc8b arch/x86/kernel/entry_64.S
--- a/arch/x86/kernel/entry_64.S Wed Dec 02 01:06:32 2009 -0800
+++ b/arch/x86/kernel/entry_64.S Thu Dec 03 07:22:17 2009 -0800
@@ -1275,6 +1275,26 @@ ENTRY(call_softirq)
CFI_ENDPROC
END(call_softirq)
+#ifdef CONFIG_INTEL_TXT
+/* void tboot_switch_stack_call(void (*target_func)(void), u64 new_rsp) */
+ENTRY(tboot_switch_stack_call)
+ CFI_STARTPROC
+ push %rbp
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET rbp,0
+ mov %rsp, %rbp
+ CFI_DEF_CFA_REGISTER rbp
+ mov %rsi, %rsp
+ push %rbp
+ call *%rdi
+ leaveq
+ CFI_DEF_CFA_REGISTER rsp
+ CFI_ADJUST_CFA_OFFSET -8
+ ret
+ CFI_ENDPROC
+END(tboot_switch_stack_call)
+#endif
+
#ifdef CONFIG_XEN
zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
diff -r c878d454dc8b arch/x86/kernel/tboot.c
--- a/arch/x86/kernel/tboot.c Wed Dec 02 01:06:32 2009 -0800
+++ b/arch/x86/kernel/tboot.c Thu Dec 03 07:22:17 2009 -0800
@@ -20,6 +20,7 @@
*/
#include <linux/dma_remapping.h>
+#include <linux/scatterlist.h>
#include <linux/init_task.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
@@ -30,12 +31,17 @@
#include <linux/pfn.h>
#include <linux/mm.h>
#include <linux/tboot.h>
+#include <linux/random.h>
+
+#include <crypto/vmac.h>
+#include <crypto/hash.h>
#include <asm/trampoline.h>
#include <asm/processor.h>
#include <asm/bootparam.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/percpu.h>
#include <asm/fixmap.h>
#include <asm/proto.h>
#include <asm/setup.h>
@@ -168,6 +174,159 @@ static void tboot_create_trampoline(void
map_base, map_size);
}
+#ifdef CONFIG_X86_64
+static char *new_stack, *new_stack_ptr;
+
+static int tboot_pre_stack_switch(void)
+{
+ BUG_ON((new_stack != NULL) || (new_stack_ptr != NULL));
+
+ /*
+ * as long as thread info is above 4G, then switch stack,
+ * since tboot can't access >4G stack for MACing
+ */
+ if (!((PFN_PHYS(PFN_DOWN(virt_to_phys(current_thread_info())))
+ + (PFN_UP(THREAD_SIZE) << PAGE_SHIFT))
+ & 0xffffffff00000000UL))
+ return -1;
+
+ new_stack = (char *)__get_free_pages(GFP_DMA32, IRQ_STACK_ORDER);
+
+ BUG_ON(new_stack == NULL);
+ memset(new_stack, 0, IRQ_STACK_SIZE);
+ new_stack_ptr = new_stack + IRQ_STACK_SIZE - 64;
+
+ return 0;
+}
+
+static void tboot_post_stack_switch(void)
+{
+ BUG_ON((new_stack == NULL) || (new_stack_ptr == NULL));
+
+ free_pages((unsigned long)new_stack, IRQ_STACK_ORDER);
+ new_stack = NULL;
+ new_stack_ptr = NULL;
+}
+
+extern void tboot_switch_stack_call(void (*target_func)(void), u64 new_rsp);
+
+#else /* CONFIG_X86_32 */
+
+#define tboot_pre_stack_switch() (-1)
+#define tboot_post_stack_switch() do { } while (0)
+#define tboot_switch_stack_call(target_func, new_rsp) do { } while (0)
+
+#endif
+
+static vmac_t mem_mac;
+static struct crypto_hash *tfm;
+
+static int tboot_gen_mem_integrity(const uint8_t key[], vmac_t *mac)
+{
+ int i, j, ret;
+ pg_data_t *pgdat;
+ struct hash_desc desc;
+ struct scatterlist sg[1];
+ struct page *page;
+ uint64_t paddr, rstart, rend;
+ unsigned long pfn;
+ uint8_t zeroed_key[VMAC_KEY_LEN];
+
+ if (!tfm)
+ tfm = crypto_alloc_hash("vmac(aes)", 0, CRYPTO_ALG_ASYNC);
+
+ if (IS_ERR(tfm)) {
+ tfm = NULL;
+ return -ENOMEM;
+ }
+
+ desc.tfm = tfm;
+ desc.flags = 0;
+
+ sg_init_table(sg, 1);
+
+ ret = crypto_hash_init(&desc);
+ if (ret)
+ return ret;
+ ret = crypto_hash_setkey(desc.tfm, key, VMAC_KEY_LEN);
+ if (ret)
+ return ret;
+
+ for_each_online_pgdat(pgdat) {
+ unsigned long flags;
+
+ pgdat_resize_lock(pgdat, &flags);
+ for (i = 0, pfn = pgdat->node_start_pfn;
+ i < pgdat->node_spanned_pages;
+ i++, pfn = pgdat->node_start_pfn + i) {
+
+ if (!pfn_valid(pfn) || !page_is_ram(pfn))
+ continue;
+
+ page = pfn_to_page(pfn);
+ paddr = page_to_phys(page);
+
+ /* If pg will be MACed by tboot, no need to MAC here */
+ for (j = 0; j < tboot->num_mac_regions; j++) {
+ rstart = tboot->mac_regions[j].start;
+ rend = rstart + tboot->mac_regions[j].size;
+ if (((paddr + PAGE_SIZE) <= rstart)
+ || (rend <= paddr))
+ continue;
+ break;
+ }
+
+ if (j == tboot->num_mac_regions) {
+ sg_set_page(sg, page, PAGE_SIZE, 0);
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /*
+ * check if the page we are going to MAC is marked as
+ * present in the kernel page tables.
+ */
+ if (!kernel_page_present(page)) {
+ kernel_map_pages(page, 1, 1);
+ ret = crypto_hash_update(&desc, sg, PAGE_SIZE);
+ kernel_map_pages(page, 1, 0);
+ } else
+#endif
+ ret = crypto_hash_update(&desc, sg, PAGE_SIZE);
+ if (ret) {
+ pgdat_resize_unlock(pgdat, &flags);
+ return ret;
+ }
+ }
+ }
+ pgdat_resize_unlock(pgdat, &flags);
+ }
+
+#ifdef CONFIG_X86_64
+ /*
+ * for stack > 4G, we should MAC the stack in the kernel after switch,
+ * for stack < 4G, the stack is MACed by tboot
+ */
+ if (new_stack) {
+ for (i = 0, page = virt_to_page((void *)current_thread_info());
+ i < (1 << THREAD_ORDER);
+ i++, page++) {
+ sg_set_page(sg, page, PAGE_SIZE, 0);
+ ret = crypto_hash_update(&desc, sg, PAGE_SIZE);
+ if (ret)
+ return ret;
+ }
+ }
+#endif
+
+ ret = crypto_hash_final(&desc, (uint8_t *)mac);
+ if (ret)
+ return ret;
+
+ /* Clean the key */
+ memset(zeroed_key, 0, sizeof(zeroed_key));
+ crypto_hash_setkey(desc.tfm, zeroed_key, VMAC_KEY_LEN);
+
+ return 0;
+}
+
#ifdef CONFIG_ACPI_SLEEP
static void add_mac_region(phys_addr_t start, unsigned long size)
@@ -196,6 +355,27 @@ static int tboot_setup_sleep(void)
/* kernel code + data + bss */
add_mac_region(virt_to_phys(_text), _end - _text);
+
+ /* stack */
+#ifdef CONFIG_X86_64
+ /*
+ * if stack > 4G, we should MAC the stack in the kernel after switch,
+ * if stack < 4G, the stack is MACed by tboot
+ */
+ if (new_stack)
+ add_mac_region(virt_to_phys(new_stack),
+ IRQ_STACK_SIZE); /* > 4G */
+ else
+#endif
+ add_mac_region(virt_to_phys(current_thread_info()),
+ THREAD_SIZE); /* < 4G */
+
+ /* MAC userspace memory not handled by tboot */
+ get_random_bytes(tboot->s3_key, sizeof(tboot->s3_key));
+ if (tboot_gen_mem_integrity(tboot->s3_key, &mem_mac)) {
+ panic("tboot: vmac generation failed\n");
+ return -1;
+ }
tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address;
@@ -292,6 +472,52 @@ void tboot_sleep(u8 sleep_state, u32 pm1
}
tboot_shutdown(acpi_shutdown_map[sleep_state]);
+}
+
+static void tboot_sx_resume(void)
+{
+ vmac_t mac;
+
+ if (tboot_gen_mem_integrity(tboot->s3_key, &mac))
+ panic("tboot: vmac generation failed\n");
+ else if (mac != mem_mac)
+#ifdef CONFIG_DEBUG_KERNEL
+ pr_debug("tboot: memory integrity %llx -> %llx\n",
+ mem_mac, mac);
+#else
+ panic("tboot: memory integrity was lost on resume\n");
+#endif
+ else
+ pr_info("memory integrity OK\n");
+
+ /* Clean s3_key */
+ memset(tboot->s3_key, 0, sizeof(tboot->s3_key));
+}
+
+extern void do_suspend_lowlevel(void);
+
+static void tboot_do_suspend_lowlevel_call(void)
+{
+ do_suspend_lowlevel();
+ tboot_sx_resume();
+}
+
+void tboot_do_suspend_lowlevel(void)
+{
+ int ret = -1;
+
+ if (!tboot_enabled()) {
+ do_suspend_lowlevel();
+ return;
+ }
+
+ ret = tboot_pre_stack_switch();
+ if (!ret) {
+ tboot_switch_stack_call(tboot_do_suspend_lowlevel_call,
+ (u64)new_stack_ptr);
+ tboot_post_stack_switch();
+ } else
+ tboot_do_suspend_lowlevel_call();
}
static atomic_t ap_wfs_count;
diff -r c878d454dc8b drivers/acpi/sleep.c
--- a/drivers/acpi/sleep.c Wed Dec 02 01:06:32 2009 -0800
+++ b/drivers/acpi/sleep.c Thu Dec 03 07:22:17 2009 -0800
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/suspend.h>
#include <linux/reboot.h>
+#include <linux/tboot.h>
#include <asm/io.h>
@@ -244,7 +245,7 @@ static int acpi_suspend_enter(suspend_st
break;
case ACPI_STATE_S3:
- do_suspend_lowlevel();
+ tboot_do_suspend_lowlevel();
break;
}
diff -r c878d454dc8b include/linux/mm.h
--- a/include/linux/mm.h Wed Dec 02 01:06:32 2009 -0800
+++ b/include/linux/mm.h Thu Dec 03 07:22:17 2009 -0800
@@ -1263,18 +1263,18 @@ static inline void enable_debug_pageallo
{
debug_pagealloc_enabled = 1;
}
-#ifdef CONFIG_HIBERNATION
+#if defined(CONFIG_HIBERNATION) || defined(CONFIG_INTEL_TXT)
extern bool kernel_page_present(struct page *page);
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATION || CONFIG_INTEL_TXT */
#else
static inline void
kernel_map_pages(struct page *page, int numpages, int enable) {}
static inline void enable_debug_pagealloc(void)
{
}
-#ifdef CONFIG_HIBERNATION
+#if defined(CONFIG_HIBERNATION) || defined(CONFIG_INTEL_TXT)
static inline bool kernel_page_present(struct page *page) { return true; }
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATION || CONFIG_INTEL_TXT */
#endif
extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
diff -r c878d454dc8b include/linux/tboot.h
--- a/include/linux/tboot.h Wed Dec 02 01:06:32 2009 -0800
+++ b/include/linux/tboot.h Thu Dec 03 07:22:17 2009 -0800
@@ -147,7 +147,7 @@ extern struct acpi_table_header *tboot_g
extern struct acpi_table_header *tboot_get_dmar_table(
struct acpi_table_header *dmar_tbl);
extern int tboot_force_iommu(void);
-
+extern void tboot_do_suspend_lowlevel(void);
#else
#define tboot_probe() do { } while (0)
@@ -156,6 +156,7 @@ extern int tboot_force_iommu(void);
do { } while (0)
#define tboot_get_dmar_table(dmar_tbl) (dmar_tbl)
#define tboot_force_iommu() 0
+#define tboot_do_suspend_lowlevel() do_suspend_lowlevel()
#endif /* !CONFIG_INTEL_TXT */
diff -r c878d454dc8b security/Kconfig
--- a/security/Kconfig Wed Dec 02 01:06:32 2009 -0800
+++ b/security/Kconfig Thu Dec 03 07:22:17 2009 -0800
@@ -116,6 +116,8 @@ config INTEL_TXT
config INTEL_TXT
bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)"
depends on HAVE_INTEL_TXT
+ select CRYPTO_VMAC
+ select CRYPTO_AES
help
This option enables support for booting the kernel with the
Trusted Boot (tboot) module. This will utilize
On Fri, Dec 04, 2009 at 05:12:11PM +0800, Shane Wang wrote:
> diff -r c878d454dc8b arch/x86/kernel/entry_64.S
> --- a/arch/x86/kernel/entry_64.S Wed Dec 02 01:06:32 2009 -0800
> +++ b/arch/x86/kernel/entry_64.S Thu Dec 03 07:22:17 2009 -0800
> @@ -1275,6 +1275,26 @@ ENTRY(call_softirq)
> CFI_ENDPROC
> END(call_softirq)
>
> +#ifdef CONFIG_INTEL_TXT
> +/* void tboot_switch_stack_call(void (*target_func)(void), u64 new_rsp) */
> +ENTRY(tboot_switch_stack_call)
I would drop the tboot_ prefix, stack switching can be useful for other
things too.
> + CFI_STARTPROC
> + push %rbp
> + CFI_ADJUST_CFA_OFFSET 8
> + CFI_REL_OFFSET rbp,0
> + mov %rsp, %rbp
> + CFI_DEF_CFA_REGISTER rbp
Did you verify that the dwarf2 really works and gdb can backtrace through
it?
> +{
> + BUG_ON((new_stack != NULL) || (new_stack_ptr != NULL));
> +
> + /*
> + * as long as thread info is above 4G, then switch stack,
> + * since tboot can't access >4G stack for MACing
> + */
> + if (!((PFN_PHYS(PFN_DOWN(virt_to_phys(current_thread_info())))
> + + (PFN_UP(THREAD_SIZE) << PAGE_SHIFT))
> + & 0xffffffff00000000UL))
> + return -1;
All the PFN_*s seem somewhat redundant, it would be easier to keep
everything shifted up in the first place.
> +
> + new_stack = (char *)__get_free_pages(GFP_DMA32, IRQ_STACK_ORDER);
> +
> + BUG_ON(new_stack == NULL);
GFP_REPEAT at least? BUG_ON is a nasty way to handle out of memory
> + memset(new_stack, 0, IRQ_STACK_SIZE);
GFP_ZERO
> + new_stack_ptr = new_stack + IRQ_STACK_SIZE - 64;
Why - 64?
> +
> + return 0;
> +}
> +
> +static void tboot_post_stack_switch(void)
> +{
> + BUG_ON((new_stack == NULL) || (new_stack_ptr == NULL));
> +
> + free_pages((unsigned long)new_stack, IRQ_STACK_ORDER);
> + new_stack = NULL;
> + new_stack_ptr = NULL;
> +}
> +
> +extern void tboot_switch_stack_call(void (*target_func)(void), u64
> new_rsp);
Typically those should be in some header.
> + struct page *page;
> + uint64_t paddr, rstart, rend;
> + unsigned long pfn;
> + uint8_t zeroed_key[VMAC_KEY_LEN];
> +
> + if (!tfm)
> + tfm = crypto_alloc_hash("vmac(aes)", 0, CRYPTO_ALG_ASYNC);
> +
> + if (IS_ERR(tfm)) {
> + tfm = NULL;
> + return -ENOMEM;
> + }
> +
> + desc.tfm = tfm;
> + desc.flags = 0;
> +
> + sg_init_table(sg, 1);
> +
> + ret = crypto_hash_init(&desc);
> + if (ret)
> + return ret;
> + ret = crypto_hash_setkey(desc.tfm, key, VMAC_KEY_LEN);
> + if (ret)
> + return ret;
> +
> + for_each_online_pgdat(pgdat) {
No locking against memory hotplug? Even if user space is still down
doing that would be safer
> + unsigned long flags;
> +
> + pgdat_resize_lock(pgdat, &flags);
> + for (i = 0, pfn = pgdat->node_start_pfn;
> + i < pgdat->node_spanned_pages;
> + i++, pfn = pgdat->node_start_pfn + i) {
> +
> + if (!pfn_valid(pfn) || !page_is_ram(pfn))
> + continue;
You probably should consider a faster way to skip holes, doing
them piece by piece might well be a performance problem on very
holey systems. Especially page_is_ram() is quite slow.
> + || (rend <= paddr))
> + continue;
> + break;
> + }
> +
> + if (j == tboot->num_mac_regions) {
> + sg_set_page(sg, page, PAGE_SIZE, 0);
> +#ifdef CONFIG_DEBUG_PAGEALLOC
> + /*
> + * check if the page we are going to MAC is marked as
> + * present in the kernel page tables.
> + */
> + if (!kernel_page_present(page)) {
> + kernel_map_pages(page, 1, 1);
> + ret = crypto_hash_update(&desc, sg,
> PAGE_SIZE);
> + kernel_map_pages(page, 1, 0);
Nasty, might be racy -- better use a separate mapping in this case.
> +#ifdef CONFIG_X86_64
> + /*
> + * for stack > 4G, we should MAC the stack in the kernel after
> switch,
> + * for stack < 4G, the stack is MACed by tboot
> + */
This special case seems quite ugly, with all its ifdefs and lots
of special code.
Can't you just MAC the stack always? Shouldn't be that expensive.
> + else
> +#endif
> + add_mac_region(virt_to_phys(current_thread_info()),
> + THREAD_SIZE); /* < 4G */
> +
> + /* MAC userspace memory not handled by tboot */
> + get_random_bytes(tboot->s3_key, sizeof(tboot->s3_key));
That's early in boot isn't it? It's quite doubtful you'll get
anything even vaguely random out of get_random_bytes at this point, may be not
even the time.
> }
>
> tboot_shutdown(acpi_shutdown_map[sleep_state]);
> +}
> +
> +static void tboot_sx_resume(void)
> +{
> + vmac_t mac;
> +
> + if (tboot_gen_mem_integrity(tboot->s3_key, &mac))
> + panic("tboot: vmac generation failed\n");
> + else if (mac != mem_mac)
> +#ifdef CONFIG_DEBUG_KERNEL
> + pr_debug("tboot: memory integrity %llx -> %llx\n",
> + mem_mac, mac);
> +#else
> + panic("tboot: memory integrity was lost on resume\n");
> +#endif
I don't think DEBUG_KERNEL is supposed to be used like this, better
probably a separate option if it makes sense.
Typical problem with panics at this point: is anything even visible
on the screen?
>
> +#ifdef CONFIG_INTEL_TXT
> +/* void tboot_switch_stack_call(void (*target_func)(void), u64 new_rsp) */
> +ENTRY(tboot_switch_stack_call)
Hmm, I thought i had seen that earlier already? Is it a copy?
BTW there's no reason all of this has to be in entry*.S, that
file is already quite crowded.
The patch is duplicated here?
-Andi