Move tboot.h from asm to linux to fix the build errors of intel_txt patch on non-X86 platforms. Remove the tboot code from generic code init/main.c and kernel/cpu.c.
Signed-off-by: Shane Wang <[email protected]>
diff -r c6f74b152a32 arch/x86/Kconfig
--- a/arch/x86/Kconfig Tue Sep 01 07:24:42 2009 -0700
+++ b/arch/x86/Kconfig Tue Sep 01 07:28:21 2009 -0700
@@ -179,6 +179,10 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y
+
+config HAVE_INTEL_TXT
+ def_bool y
+ depends on EXPERIMENTAL && DMAR && ACPI
# Use the generic interrupt handling code in kernel/irq/:
config GENERIC_HARDIRQS
diff -r c6f74b152a32 arch/x86/include/asm/tboot.h
--- a/arch/x86/include/asm/tboot.h Tue Sep 01 07:24:42 2009 -0700
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,197 +0,0 @@
-/*
- * tboot.h: shared data structure with tboot and kernel and functions
- * used by kernel for runtime support of Intel(R) Trusted
- * Execution Technology
- *
- * Copyright (c) 2006-2009, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-
-#ifndef _ASM_TBOOT_H
-#define _ASM_TBOOT_H
-
-#include <acpi/acpi.h>
-
-/* these must have the values from 0-5 in this order */
-enum {
- TB_SHUTDOWN_REBOOT = 0,
- TB_SHUTDOWN_S5,
- TB_SHUTDOWN_S4,
- TB_SHUTDOWN_S3,
- TB_SHUTDOWN_HALT,
- TB_SHUTDOWN_WFS
-};
-
-#ifdef CONFIG_INTEL_TXT
-
-/* used to communicate between tboot and the launched kernel */
-
-#define TB_KEY_SIZE 64 /* 512 bits */
-
-#define MAX_TB_MAC_REGIONS 32
-
-struct tboot_mac_region {
- u64 start; /* must be 64 byte -aligned */
- u32 size; /* must be 64 byte -granular */
-} __packed;
-
-/* GAS - Generic Address Structure (ACPI 2.0+) */
-struct tboot_acpi_generic_address {
- u8 space_id;
- u8 bit_width;
- u8 bit_offset;
- u8 access_width;
- u64 address;
-} __packed;
-
-/*
- * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec
- * (http://www.acpi.info/)
- */
-struct tboot_acpi_sleep_info {
- struct tboot_acpi_generic_address pm1a_cnt_blk;
- struct tboot_acpi_generic_address pm1b_cnt_blk;
- struct tboot_acpi_generic_address pm1a_evt_blk;
- struct tboot_acpi_generic_address pm1b_evt_blk;
- u16 pm1a_cnt_val;
- u16 pm1b_cnt_val;
- u64 wakeup_vector;
- u32 vector_width;
- u64 kernel_s3_resume_vector;
-} __packed;
-
-/*
- * shared memory page used for communication between tboot and kernel
- */
-struct tboot {
- /*
- * version 3+ fields:
- */
-
- /* TBOOT_UUID */
- u8 uuid[16];
-
- /* version number: 5 is current */
- u32 version;
-
- /* physical addr of tb_log_t log */
- u32 log_addr;
-
- /*
- * physical addr of entry point for tboot shutdown and
- * type of shutdown (TB_SHUTDOWN_*) being requested
- */
- u32 shutdown_entry;
- u32 shutdown_type;
-
- /* kernel-specified ACPI info for Sx shutdown */
- struct tboot_acpi_sleep_info acpi_sinfo;
-
- /* tboot location in memory (physical) */
- u32 tboot_base;
- u32 tboot_size;
-
- /* memory regions (phys addrs) for tboot to MAC on S3 */
- u8 num_mac_regions;
- struct tboot_mac_region mac_regions[MAX_TB_MAC_REGIONS];
-
-
- /*
- * version 4+ fields:
- */
-
- /* symmetric key for use by kernel; will be encrypted on S3 */
- u8 s3_key[TB_KEY_SIZE];
-
-
- /*
- * version 5+ fields:
- */
-
- /* used to 4byte-align num_in_wfs */
- u8 reserved_align[3];
-
- /* number of processors in wait-for-SIPI */
- u32 num_in_wfs;
-} __packed;
-
-/*
- * UUID for tboot data struct to facilitate matching
- * defined as {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} by tboot, which is
- * represented as {} in the char array used here
- */
-#define TBOOT_UUID {0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\
- 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8}
-
-extern struct tboot *tboot;
-
-static inline int tboot_enabled(void)
-{
- return tboot != NULL;
-}
-
-extern void tboot_probe(void);
-extern void tboot_create_trampoline(void);
-extern void tboot_shutdown(u32 shutdown_type);
-extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control);
-extern int tboot_wait_for_aps(int num_aps);
-extern struct acpi_table_header *tboot_get_dmar_table(
- struct acpi_table_header *dmar_tbl);
-extern int tboot_force_iommu(void);
-
-#else /* CONFIG_INTEL_TXT */
-
-static inline int tboot_enabled(void)
-{
- return 0;
-}
-
-static inline void tboot_probe(void)
-{
-}
-
-static inline void tboot_create_trampoline(void)
-{
-}
-
-static inline void tboot_shutdown(u32 shutdown_type)
-{
-}
-
-static inline void tboot_sleep(u8 sleep_state, u32 pm1a_control,
- u32 pm1b_control)
-{
-}
-
-static inline int tboot_wait_for_aps(int num_aps)
-{
- return 0;
-}
-
-static inline struct acpi_table_header *tboot_get_dmar_table(
- struct acpi_table_header *dmar_tbl)
-{
- return dmar_tbl;
-}
-
-static inline int tboot_force_iommu(void)
-{
- return 0;
-}
-
-#endif /* !CONFIG_INTEL_TXT */
-
-#endif /* _ASM_TBOOT_H */
diff -r c6f74b152a32 arch/x86/kernel/reboot.c
--- a/arch/x86/kernel/reboot.c Tue Sep 01 07:24:42 2009 -0700
+++ b/arch/x86/kernel/reboot.c Tue Sep 01 07:28:21 2009 -0700
@@ -4,6 +4,7 @@
#include <linux/pm.h>
#include <linux/efi.h>
#include <linux/dmi.h>
+#include <linux/tboot.h>
#include <acpi/reboot.h>
#include <asm/io.h>
#include <asm/apic.h>
@@ -23,8 +24,6 @@
#else
# include <asm/iommu.h>
#endif
-
-#include <asm/tboot.h>
/*
* Power off function, if any
diff -r c6f74b152a32 arch/x86/kernel/setup.c
--- a/arch/x86/kernel/setup.c Tue Sep 01 07:24:42 2009 -0700
+++ b/arch/x86/kernel/setup.c Tue Sep 01 07:28:21 2009 -0700
@@ -66,6 +66,7 @@
#include <linux/percpu.h>
#include <linux/crash_dump.h>
+#include <linux/tboot.h>
#include <video/edid.h>
@@ -140,8 +141,6 @@ struct boot_params __initdata boot_param
#else
struct boot_params boot_params;
#endif
-
-#include <asm/tboot.h>
/*
* Machine setup..
diff -r c6f74b152a32 arch/x86/kernel/smpboot.c
--- a/arch/x86/kernel/smpboot.c Tue Sep 01 07:24:42 2009 -0700
+++ b/arch/x86/kernel/smpboot.c Tue Sep 01 07:28:21 2009 -0700
@@ -47,6 +47,7 @@
#include <linux/bootmem.h>
#include <linux/err.h>
#include <linux/nmi.h>
+#include <linux/tboot.h>
#include <asm/acpi.h>
#include <asm/desc.h>
@@ -62,7 +63,6 @@
#include <asm/vmi.h>
#include <asm/apic.h>
#include <asm/setup.h>
-#include <asm/tboot.h>
#include <asm/uv/uv.h>
#include <asm/debugreg.h>
#include <linux/mc146818rtc.h>
diff -r c6f74b152a32 arch/x86/kernel/tboot.c
--- a/arch/x86/kernel/tboot.c Tue Sep 01 07:24:42 2009 -0700
+++ b/arch/x86/kernel/tboot.c Tue Sep 01 07:28:21 2009 -0700
@@ -22,11 +22,14 @@
#include <linux/dma_remapping.h>
#include <linux/init_task.h>
#include <linux/spinlock.h>
+#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/dmar.h>
+#include <linux/cpu.h>
#include <linux/pfn.h>
#include <linux/mm.h>
+#include <linux/tboot.h>
#include <asm/trampoline.h>
#include <asm/processor.h>
@@ -36,7 +39,6 @@
#include <asm/fixmap.h>
#include <asm/proto.h>
#include <asm/setup.h>
-#include <asm/tboot.h>
#include <asm/e820.h>
#include <asm/io.h>
@@ -154,12 +156,9 @@ static int map_tboot_pages(unsigned long
return 0;
}
-void tboot_create_trampoline(void)
+static void tboot_create_trampoline(void)
{
u32 map_base, map_size;
-
- if (!tboot_enabled())
- return;
/* Create identity map for tboot shutdown code. */
map_base = PFN_DOWN(tboot->tboot_base);
@@ -295,20 +294,57 @@ void tboot_sleep(u8 sleep_state, u32 pm1
tboot_shutdown(acpi_shutdown_map[sleep_state]);
}
-int tboot_wait_for_aps(int num_aps)
+static atomic_t ap_wfs_count;
+
+static int tboot_wait_for_aps(int num_aps)
{
unsigned long timeout;
+ timeout = AP_WAIT_TIMEOUT*HZ;
+ while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
+ timeout) {
+ mdelay(1);
+ timeout--;
+ }
+
+ if (timeout)
+ pr_warning("tboot wait for APs timeout\n");
+
+ return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
+}
+
+static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ switch (action) {
+ case CPU_DYING:
+ atomic_inc(&ap_wfs_count);
+ if (num_online_cpus() == 1)
+ if (tboot_wait_for_aps(atomic_read(&ap_wfs_count)))
+ return NOTIFY_BAD;
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block tboot_cpu_notifier __cpuinitdata =
+{
+ .notifier_call = tboot_cpu_callback,
+};
+
+static __init int tboot_late_init(void)
+{
if (!tboot_enabled())
return 0;
- timeout = jiffies + AP_WAIT_TIMEOUT*HZ;
- while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
- time_before(jiffies, timeout))
- cpu_relax();
+ tboot_create_trampoline();
- return time_before(jiffies, timeout) ? 0 : 1;
+ atomic_set(&ap_wfs_count, 0);
+ register_hotcpu_notifier(&tboot_cpu_notifier);
+ return 0;
}
+
+late_initcall(tboot_late_init);
/*
* TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE)
diff -r c6f74b152a32 drivers/acpi/acpica/hwsleep.c
--- a/drivers/acpi/acpica/hwsleep.c Tue Sep 01 07:24:42 2009 -0700
+++ b/drivers/acpi/acpica/hwsleep.c Tue Sep 01 07:28:21 2009 -0700
@@ -45,7 +45,7 @@
#include <acpi/acpi.h>
#include "accommon.h"
#include "actables.h"
-#include <asm/tboot.h>
+#include <linux/tboot.h>
#define _COMPONENT ACPI_HARDWARE
ACPI_MODULE_NAME("hwsleep")
diff -r c6f74b152a32 drivers/pci/dmar.c
--- a/drivers/pci/dmar.c Tue Sep 01 07:24:42 2009 -0700
+++ b/drivers/pci/dmar.c Tue Sep 01 07:28:21 2009 -0700
@@ -33,7 +33,7 @@
#include <linux/timer.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
-#include <asm/tboot.h>
+#include <linux/tboot.h>
#undef PREFIX
#define PREFIX "DMAR:"
diff -r c6f74b152a32 drivers/pci/intel-iommu.c
--- a/drivers/pci/intel-iommu.c Tue Sep 01 07:24:42 2009 -0700
+++ b/drivers/pci/intel-iommu.c Tue Sep 01 07:28:21 2009 -0700
@@ -37,8 +37,8 @@
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
#include <linux/sysdev.h>
+#include <linux/tboot.h>
#include <asm/cacheflush.h>
-#include <asm/tboot.h>
#include <asm/iommu.h>
#include "pci.h"
diff -r c6f74b152a32 include/linux/tboot.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/include/linux/tboot.h Tue Sep 01 07:28:21 2009 -0700
@@ -0,0 +1,162 @@
+/*
+ * tboot.h: shared data structure with tboot and kernel and functions
+ * used by kernel for runtime support of Intel(R) Trusted
+ * Execution Technology
+ *
+ * Copyright (c) 2006-2009, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef _LINUX_TBOOT_H
+#define _LINUX_TBOOT_H
+
+/* these must have the values from 0-5 in this order */
+enum {
+ TB_SHUTDOWN_REBOOT = 0,
+ TB_SHUTDOWN_S5,
+ TB_SHUTDOWN_S4,
+ TB_SHUTDOWN_S3,
+ TB_SHUTDOWN_HALT,
+ TB_SHUTDOWN_WFS
+};
+
+#ifdef CONFIG_INTEL_TXT
+#include <acpi/acpi.h>
+/* used to communicate between tboot and the launched kernel */
+
+#define TB_KEY_SIZE 64 /* 512 bits */
+
+#define MAX_TB_MAC_REGIONS 32
+
+struct tboot_mac_region {
+ u64 start; /* must be 64 byte -aligned */
+ u32 size; /* must be 64 byte -granular */
+} __packed;
+
+/* GAS - Generic Address Structure (ACPI 2.0+) */
+struct tboot_acpi_generic_address {
+ u8 space_id;
+ u8 bit_width;
+ u8 bit_offset;
+ u8 access_width;
+ u64 address;
+} __packed;
+
+/*
+ * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec
+ * (http://www.acpi.info/)
+ */
+struct tboot_acpi_sleep_info {
+ struct tboot_acpi_generic_address pm1a_cnt_blk;
+ struct tboot_acpi_generic_address pm1b_cnt_blk;
+ struct tboot_acpi_generic_address pm1a_evt_blk;
+ struct tboot_acpi_generic_address pm1b_evt_blk;
+ u16 pm1a_cnt_val;
+ u16 pm1b_cnt_val;
+ u64 wakeup_vector;
+ u32 vector_width;
+ u64 kernel_s3_resume_vector;
+} __packed;
+
+/*
+ * shared memory page used for communication between tboot and kernel
+ */
+struct tboot {
+ /*
+ * version 3+ fields:
+ */
+
+ /* TBOOT_UUID */
+ u8 uuid[16];
+
+ /* version number: 5 is current */
+ u32 version;
+
+ /* physical addr of tb_log_t log */
+ u32 log_addr;
+
+ /*
+ * physical addr of entry point for tboot shutdown and
+ * type of shutdown (TB_SHUTDOWN_*) being requested
+ */
+ u32 shutdown_entry;
+ u32 shutdown_type;
+
+ /* kernel-specified ACPI info for Sx shutdown */
+ struct tboot_acpi_sleep_info acpi_sinfo;
+
+ /* tboot location in memory (physical) */
+ u32 tboot_base;
+ u32 tboot_size;
+
+ /* memory regions (phys addrs) for tboot to MAC on S3 */
+ u8 num_mac_regions;
+ struct tboot_mac_region mac_regions[MAX_TB_MAC_REGIONS];
+
+
+ /*
+ * version 4+ fields:
+ */
+
+ /* symmetric key for use by kernel; will be encrypted on S3 */
+ u8 s3_key[TB_KEY_SIZE];
+
+
+ /*
+ * version 5+ fields:
+ */
+
+ /* used to 4byte-align num_in_wfs */
+ u8 reserved_align[3];
+
+ /* number of processors in wait-for-SIPI */
+ u32 num_in_wfs;
+} __packed;
+
+/*
+ * UUID for tboot data struct to facilitate matching
+ * defined as {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} by tboot, which is
+ * represented as {} in the char array used here
+ */
+#define TBOOT_UUID {0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\
+ 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8}
+
+extern struct tboot *tboot;
+
+static inline int tboot_enabled(void)
+{
+ return tboot != NULL;
+}
+
+extern void tboot_probe(void);
+extern void tboot_shutdown(u32 shutdown_type);
+extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control);
+extern struct acpi_table_header *tboot_get_dmar_table(
+ struct acpi_table_header *dmar_tbl);
+extern int tboot_force_iommu(void);
+
+#else
+
+#define tboot_probe() do { } while (0)
+#define tboot_shutdown(shutdown_type) do { } while (0)
+#define tboot_sleep(sleep_state, pm1a_control, pm1b_control) \
+ do { } while (0)
+#define tboot_get_dmar_table(dmar_tbl) (dmar_tbl)
+#define tboot_force_iommu() 0
+
+#endif /* !CONFIG_INTEL_TXT */
+
+#endif /* _LINUX_TBOOT_H */
diff -r c6f74b152a32 init/main.c
--- a/init/main.c Tue Sep 01 07:24:42 2009 -0700
+++ b/init/main.c Tue Sep 01 07:28:21 2009 -0700
@@ -73,7 +73,6 @@
#include <asm/io.h>
#include <asm/bugs.h>
#include <asm/setup.h>
-#include <asm/tboot.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>
@@ -716,8 +715,6 @@ asmlinkage void __init start_kernel(void
ftrace_init();
- tboot_create_trampoline();
-
/* Do the rest non-__init'ed, we're now alive */
rest_init();
}
diff -r c6f74b152a32 kernel/cpu.c
--- a/kernel/cpu.c Tue Sep 01 07:24:42 2009 -0700
+++ b/kernel/cpu.c Tue Sep 01 07:28:21 2009 -0700
@@ -14,7 +14,6 @@
#include <linux/kthread.h>
#include <linux/stop_machine.h>
#include <linux/mutex.h>
-#include <asm/tboot.h>
#ifdef CONFIG_SMP
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
@@ -377,7 +376,7 @@ static cpumask_var_t frozen_cpus;
int disable_nonboot_cpus(void)
{
- int cpu, first_cpu, error, num_cpus = 0;
+ int cpu, first_cpu, error;
error = stop_machine_create();
if (error)
@@ -392,7 +391,6 @@ int disable_nonboot_cpus(void)
for_each_online_cpu(cpu) {
if (cpu == first_cpu)
continue;
- num_cpus++;
error = _cpu_down(cpu, 1);
if (!error) {
cpumask_set_cpu(cpu, frozen_cpus);
@@ -403,8 +401,6 @@ int disable_nonboot_cpus(void)
break;
}
}
- /* ensure all CPUs have gone into wait-for-SIPI */
- error |= tboot_wait_for_aps(num_cpus);
if (!error) {
BUG_ON(num_online_cpus() > 1);
diff -r c6f74b152a32 security/Kconfig
--- a/security/Kconfig Tue Sep 01 07:24:42 2009 -0700
+++ b/security/Kconfig Tue Sep 01 07:28:21 2009 -0700
@@ -131,7 +131,7 @@ config LSM_MMAP_MIN_ADDR
config INTEL_TXT
bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)"
- depends on EXPERIMENTAL && X86 && DMAR && ACPI
+ depends on HAVE_INTEL_TXT
help
This option enables support for booting the kernel with the
Trusted Boot (tboot) module. This will utilize
This patch added verification for userspace memory integrity after s3 resume.
Integrity verification for other memory (say kernel itself) has been done by tboot.
Signed-off-by: Shane Wang <[email protected]>
Signed-off-by: Joseph Cihula <[email protected]>
diff -r 42870e183bd5 arch/x86/kernel/tboot.c
--- a/arch/x86/kernel/tboot.c Fri Sep 25 06:06:39 2009 -0400
+++ b/arch/x86/kernel/tboot.c Fri Sep 25 11:03:04 2009 -0400
@@ -20,6 +20,7 @@
*/
#include <linux/dma_remapping.h>
+#include <linux/scatterlist.h>
#include <linux/init_task.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
@@ -30,12 +31,17 @@
#include <linux/pfn.h>
#include <linux/mm.h>
#include <linux/tboot.h>
+#include <linux/random.h>
+
+#include <crypto/vmac.h>
+#include <crypto/hash.h>
#include <asm/trampoline.h>
#include <asm/processor.h>
#include <asm/bootparam.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/percpu.h>
#include <asm/fixmap.h>
#include <asm/proto.h>
#include <asm/setup.h>
@@ -168,6 +174,80 @@ static void tboot_create_trampoline(void
map_base, map_size);
}
+static vmac_t mem_mac;
+static struct crypto_hash *tfm;
+
+static int tboot_gen_mem_integrity(const uint8_t key[], vmac_t *mac)
+{
+ int i, j, ret;
+ pg_data_t *pgdat;
+ struct hash_desc desc;
+ struct scatterlist sg[1];
+ struct page *page;
+ uint64_t paddr, rstart, rend;
+ unsigned long pfn;
+ uint8_t zeroed_key[VMAC_KEY_LEN];
+
+ if (!tfm)
+ tfm = crypto_alloc_hash("vmac(aes)", 0, CRYPTO_ALG_ASYNC);
+
+ if (IS_ERR(tfm)) {
+ tfm = NULL;
+ return -ENOMEM;
+ }
+
+ desc.tfm = tfm;
+ desc.flags = 0;
+
+ sg_init_table(sg, 1);
+
+ ret = crypto_hash_init(&desc);
+ if (ret)
+ return ret;
+ ret = crypto_hash_setkey(desc.tfm, key, VMAC_KEY_LEN);
+ if (ret)
+ return ret;
+
+ for_each_online_pgdat(pgdat) {
+ for (i = 0, pfn = pgdat->node_start_pfn;
+ i < pgdat->node_spanned_pages;
+ i++, pfn = pgdat->node_start_pfn + i) {
+
+ if (!pfn_valid(pfn) || !page_is_ram(pfn))
+ continue;
+
+ page = pfn_to_page(pfn);
+ paddr = page_to_phys(page);
+
+ /* If pg will be MACed by tboot, no need to MAC here */
+ for (j = 0; j < tboot->num_mac_regions; j++) {
+ rstart = tboot->mac_regions[j].start;
+ rend = rstart + tboot->mac_regions[j].size;
+ if (((paddr + PAGE_SIZE) <= rstart)
+ || (rend <= paddr))
+ continue;
+ break;
+ }
+
+ if (j == tboot->num_mac_regions) {
+ sg_set_page(sg, page, PAGE_SIZE, 0);
+ ret = crypto_hash_update(&desc, sg, PAGE_SIZE);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ ret = crypto_hash_final(&desc, (uint8_t *)mac);
+ if (ret)
+ return ret;
+
+ /* Clean the key */
+ memset(zeroed_key, 0, sizeof(zeroed_key));
+ crypto_hash_setkey(desc.tfm, zeroed_key, VMAC_KEY_LEN);
+
+ return 0;
+}
+
#ifdef CONFIG_ACPI_SLEEP
static void add_mac_region(phys_addr_t start, unsigned long size)
@@ -197,6 +277,16 @@ static int tboot_setup_sleep(void)
/* kernel code + data + bss */
add_mac_region(virt_to_phys(_text), _end - _text);
+ /* stack */
+ add_mac_region(virt_to_phys(current_thread_info()), THREAD_SIZE);
+
+ /* MAC userspace memory not handled by tboot */
+ get_random_bytes(tboot->s3_key, sizeof(tboot->s3_key));
+ if (tboot_gen_mem_integrity(tboot->s3_key, &mem_mac)) {
+ panic("tboot: vmac generation failed\n");
+ return -1;
+ }
+
tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address;
return 0;
@@ -212,6 +302,68 @@ static int tboot_setup_sleep(void)
}
#endif
+
+static struct thread_info *low_ti, *current_ti;
+
+static void tboot_do_stack_switch(struct thread_info *new_ti,
+ struct thread_info *old_ti)
+{
+ memcpy(new_ti, old_ti, THREAD_SIZE);
+#ifdef CONFIG_X86_32
+ asm volatile (
+ " and %0, %%esp; "
+ " add %1, %%esp; "
+ : : "i" (THREAD_SIZE - 1), "r" (new_ti));
+#else
+ asm volatile (
+ " and %0, %%rsp; "
+ " add %1, %%rsp; "
+ : : "i" (THREAD_SIZE - 1), "r" (new_ti));
+ percpu_write(kernel_stack, (unsigned long)new_ti -
+ KERNEL_STACK_OFFSET + THREAD_SIZE);
+#endif
+ current->stack = new_ti;
+}
+
+void tboot_switch_stack(void)
+{
+ if (!tboot_enabled())
+ return;
+
+ current_ti = current_thread_info();
+
+ /* If thread info is above 4G, then switch stack */
+ if (!((PFN_PHYS(PFN_DOWN(virt_to_phys(current_ti)))
+ + (PFN_UP(THREAD_SIZE) << PAGE_SHIFT))
+ & 0xffffffff00000000ULL))
+ return;
+
+ if (low_ti == NULL)
+ low_ti = (struct thread_info *)
+ __get_free_pages(GFP_DMA32, THREAD_ORDER);
+
+ tboot_do_stack_switch(low_ti, current_ti);
+}
+
+void tboot_restore_stack(void)
+{
+ if (!tboot_enabled())
+ return;
+
+ if (current_ti == NULL)
+ BUG();
+
+ /* If thread info is above 4G, then restore stack */
+ if (!((PFN_PHYS(PFN_DOWN(virt_to_phys(current_ti)))
+ + (PFN_UP(THREAD_SIZE) << PAGE_SHIFT))
+ & 0xffffffff00000000ULL))
+ return;
+
+ if (low_ti == NULL)
+ BUG();
+
+ tboot_do_stack_switch(current_ti, low_ti);
+}
void tboot_shutdown(u32 shutdown_type)
{
@@ -292,6 +444,24 @@ void tboot_sleep(u8 sleep_state, u32 pm1
}
tboot_shutdown(acpi_shutdown_map[sleep_state]);
+}
+
+void tboot_sx_resume(void)
+{
+ vmac_t mac;
+
+ if (!tboot_enabled())
+ return;
+
+ if (tboot_gen_mem_integrity(tboot->s3_key, &mac))
+ panic("tboot: vmac generation failed\n");
+ else if (mac != mem_mac)
+ panic("tboot: memory integrity was lost on resume\n");
+ else
+ pr_info("memory integrity OK\n");
+
+ /* Clean s3_key */
+ memset(tboot->s3_key, 0, sizeof(tboot->s3_key));
}
static atomic_t ap_wfs_count;
diff -r 42870e183bd5 drivers/acpi/sleep.c
--- a/drivers/acpi/sleep.c Fri Sep 25 06:06:39 2009 -0400
+++ b/drivers/acpi/sleep.c Fri Sep 25 11:03:04 2009 -0400
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/suspend.h>
#include <linux/reboot.h>
+#include <linux/tboot.h>
#include <asm/io.h>
@@ -244,7 +245,10 @@ static int acpi_suspend_enter(suspend_st
break;
case ACPI_STATE_S3:
+ tboot_switch_stack();
do_suspend_lowlevel();
+ tboot_sx_resume();
+ tboot_restore_stack();
break;
}
diff -r 42870e183bd5 include/linux/tboot.h
--- a/include/linux/tboot.h Fri Sep 25 06:06:39 2009 -0400
+++ b/include/linux/tboot.h Fri Sep 25 11:03:04 2009 -0400
@@ -147,7 +147,9 @@ extern struct acpi_table_header *tboot_g
extern struct acpi_table_header *tboot_get_dmar_table(
struct acpi_table_header *dmar_tbl);
extern int tboot_force_iommu(void);
-
+extern void tboot_sx_resume(void);
+extern void tboot_switch_stack(void);
+extern void tboot_restore_stack(void);
#else
#define tboot_probe() do { } while (0)
@@ -156,6 +158,9 @@ extern int tboot_force_iommu(void);
do { } while (0)
#define tboot_get_dmar_table(dmar_tbl) (dmar_tbl)
#define tboot_force_iommu() 0
+#define tboot_sx_resume() do { } while (0)
+#define tboot_switch_stack() do { } while (0)
+#define tboot_restore_stack() do { } while (0)
#endif /* !CONFIG_INTEL_TXT */
diff -r 42870e183bd5 security/Kconfig
--- a/security/Kconfig Fri Sep 25 06:06:39 2009 -0400
+++ b/security/Kconfig Fri Sep 25 11:03:04 2009 -0400
@@ -116,6 +116,8 @@ config INTEL_TXT
config INTEL_TXT
bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)"
depends on HAVE_INTEL_TXT
+ select CRYPTO_VMAC
+ select CRYPTO_AES
help
This option enables support for booting the kernel with the
Trusted Boot (tboot) module. This will utilize
This patch based on an original version by H. Peter Anvin fixed the buggy timeout warning logic in tboot.
Signed-off-by: Shane Wang <[email protected]>
diff -r 0edd117ada44 arch/x86/kernel/tboot.c
--- a/arch/x86/kernel/tboot.c Wed Sep 23 10:06:56 2009 -0700
+++ b/arch/x86/kernel/tboot.c Mon Sep 28 07:42:18 2009 -0700
@@ -301,16 +301,17 @@ static int tboot_wait_for_aps(int num_ap
unsigned long timeout;
timeout = AP_WAIT_TIMEOUT*HZ;
- while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
- timeout) {
+ while (timeout) {
+ if (atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps)
+ break;
mdelay(1);
timeout--;
}
- if (timeout)
+ if (!timeout)
pr_warning("tboot wait for APs timeout\n");
- return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
+ return !timeout;
}
static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb,
On Sun 2009-09-27 17:07:28, Shane Wang wrote:
> This patch added verification for userspace memory integrity after s3 resume.
> Integrity verification for other memory (say kernel itself) has been done by tboot.
>
AFAICT, it verifies userspace _and_ kernel memory, that's why it does
magic stack switching. Why not verify everything in tboot?
Is kernel<->tboot abi described somewhere?
> @@ -168,6 +174,80 @@ static void tboot_create_trampoline(void
> map_base, map_size);
> }
>
> +static vmac_t mem_mac;
> +static struct crypto_hash *tfm;
Could these be automatic?
> + for_each_online_pgdat(pgdat) {
> + for (i = 0, pfn = pgdat->node_start_pfn;
> + i < pgdat->node_spanned_pages;
> + i++, pfn = pgdat->node_start_pfn + i) {
> +
> + if (!pfn_valid(pfn) || !page_is_ram(pfn))
> + continue;
> +
> + page = pfn_to_page(pfn);
> + paddr = page_to_phys(page);
> +
> + /* If pg will be MACed by tboot, no need to MAC here */
> + for (j = 0; j < tboot->num_mac_regions; j++) {
> + rstart = tboot->mac_regions[j].start;
> + rend = rstart + tboot->mac_regions[j].size;
> + if (((paddr + PAGE_SIZE) <= rstart)
> + || (rend <= paddr))
> + continue;
> + break;
> + }
> +
> + if (j == tboot->num_mac_regions) {
> + sg_set_page(sg, page, PAGE_SIZE, 0);
> + ret = crypto_hash_update(&desc, sg, PAGE_SIZE);
> + if (ret)
> + return ret;
> + }
> + }
> + }
This looks over all memory, afaict. Does it correctly handle highmem?
> @@ -197,6 +277,16 @@ static int tboot_setup_sleep(void)
> /* kernel code + data + bss */
> add_mac_region(virt_to_phys(_text), _end - _text);
>
> + /* stack */
> + add_mac_region(virt_to_phys(current_thread_info()), THREAD_SIZE);
> +
> + /* MAC userspace memory not handled by tboot */
> + get_random_bytes(tboot->s3_key, sizeof(tboot->s3_key));
> + if (tboot_gen_mem_integrity(tboot->s3_key, &mem_mac)) {
> + panic("tboot: vmac generation failed\n");
> + return -1;
> + }
> +
> tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address;
>
> return 0;
> @@ -212,6 +302,68 @@ static int tboot_setup_sleep(void)
> }
>
> #endif
> +
> +static struct thread_info *low_ti, *current_ti;
> +
> +static void tboot_do_stack_switch(struct thread_info *new_ti,
> + struct thread_info *old_ti)
> +{
> + memcpy(new_ti, old_ti, THREAD_SIZE);
Memcpy does some rather odd stuff with fpu in some
configs. Like... you may get copy of thread info with fpu marked
enabled, while it really is disabled when memcpy exits.
> +#ifdef CONFIG_X86_32
> + asm volatile (
> + " and %0, %%esp; "
> + " add %1, %%esp; "
> + : : "i" (THREAD_SIZE - 1), "r" (new_ti));
> +#else
> + asm volatile (
> + " and %0, %%rsp; "
> + " add %1, %%rsp; "
> + : : "i" (THREAD_SIZE - 1), "r" (new_ti));
> + percpu_write(kernel_stack, (unsigned long)new_ti -
> + KERNEL_STACK_OFFSET + THREAD_SIZE);
> +#endif
> + current->stack = new_ti;
> +}
This is pretty subtle&fragile -- see memcpy above. Why is it needed?
> + current_ti = current_thread_info();
> +
> + /* If thread info is above 4G, then switch stack */
> + if (!((PFN_PHYS(PFN_DOWN(virt_to_phys(current_ti)))
> + + (PFN_UP(THREAD_SIZE) << PAGE_SHIFT))
> + & 0xffffffff00000000ULL))
> + return;
How can thread info be above 4G on 32-bit?
Why does 4G limit matter on 64-bit?
> +void tboot_sx_resume(void)
> +{
> + vmac_t mac;
> +
> + if (!tboot_enabled())
> + return;
> +
> + if (tboot_gen_mem_integrity(tboot->s3_key, &mac))
> + panic("tboot: vmac generation failed\n");
> + else if (mac != mem_mac)
> + panic("tboot: memory integrity was lost on resume\n");
> + else
> + pr_info("memory integrity OK\n");
So I corrupt memory, but also corrupt tboot_enabled() to return 0....
And... does panic kill the machine quickly enough that no 'bad stuff'
happens? (Whats bad stuff in this context, anyway?).
> @@ -244,7 +245,10 @@ static int acpi_suspend_enter(suspend_st
> break;
>
> case ACPI_STATE_S3:
> + tboot_switch_stack();
> do_suspend_lowlevel();
> + tboot_sx_resume();
> + tboot_restore_stack();
> break;
> }
>
Did you audit all code before sx_resume()? If it trusts data not
checksummed by tboot, attacker may be able to hijack code execution
and bypass your protection, no?
What about S1?
And what about hibernation, btw?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
> So I corrupt memory, but also corrupt tboot_enabled() to return 0....
>
> And... does panic kill the machine quickly enough that no 'bad stuff'
> happens? (Whats bad stuff in this context, anyway?).
panic has notifiers, so indeed someone could put arbitary code into panic.
-Andi
--
[email protected] -- Speaking for myself only.
part of answers.
Pavel Machek wrote:
> On Sun 2009-09-27 17:07:28, Shane Wang wrote:
>> This patch added verification for userspace memory integrity after
>> s3 resume. Integrity verification for other memory (say kernel
>> itself) has been done by tboot.
>>
>
> AFAICT, it verifies userspace _and_ kernel memory, that's why it does
> magic stack switching. Why not verify everything in tboot?
Because tboot only can access <4G mem and the memory is sparse.
Tboot likes to MAC the continuous mem.
>
> Is kernel<->tboot abi described somewhere?
>
>> @@ -168,6 +174,80 @@ static void tboot_create_trampoline(void
>> map_base, map_size); }
>>
>> +static vmac_t mem_mac;
>> +static struct crypto_hash *tfm;
>
> Could these be automatic?
We don't wish the memory is changing when MACing, including the static variables.
>> +void tboot_sx_resume(void)
>> +{
>> + vmac_t mac;
>> +
>> + if (!tboot_enabled())
>> + return;
>> +
>> + if (tboot_gen_mem_integrity(tboot->s3_key, &mac))
>> + panic("tboot: vmac generation failed\n");
>> + else if (mac != mem_mac)
>> + panic("tboot: memory integrity was lost on resume\n"); + else
>> + pr_info("memory integrity OK\n");
>
> So I corrupt memory, but also corrupt tboot_enabled() to return 0....
You corrupt the memory and tboot_enabled(). tboot MACing will find it.
> And... does panic kill the machine quickly enough that no 'bad stuff'
> happens? (Whats bad stuff in this context, anyway?).
Do you have some suggestions on it?
Shane-
Hi!
Please wrap mails at column 72 (or so).
> > AFAICT, it verifies userspace _and_ kernel memory, that's why it does
> > magic stack switching. Why not verify everything in tboot?
> Because tboot only can access <4G mem without paging. And the memory is sparse. We can't/needn't set unlimited sparse mem ranges to the MAC array with limited elements in the shared page, in order to pass the parameters.
> On the other hand, it is reasonable for tboot to verify kernel, and kernel to verify userspace memory.
>
Are you sure x86-64 kernel & modules is always below 4GB? I don't
think so.
> >> +static vmac_t mem_mac;
> >> +static struct crypto_hash *tfm;
> >
> > Could these be automatic?
> Maybe, but I don't wish other files can access the variables and take tfm as an example, I'd like to allocate memory to it once and then initialize it once in order to avoid impact of memory change to MACing.
>
You use stack, anyway.
> > Why does 4G limit matter on 64-bit?
> tboot can't access >4G, see above.
Too bad, then its broken by design.
> >> + if (tboot_gen_mem_integrity(tboot->s3_key, &mac))
> >> + panic("tboot: vmac generation failed\n");
> >> + else if (mac != mem_mac)
> >> + panic("tboot: memory integrity was lost on resume\n"); + else
> >> + pr_info("memory integrity OK\n");
> >
> > So I corrupt memory, but also corrupt tboot_enabled() to return 0....
> >
> > And... does panic kill the machine quickly enough that no 'bad stuff'
> > happens? (Whats bad stuff in this context, anyway?).
I'd really like you to answer that.
> >> @@ -244,7 +245,10 @@ static int acpi_suspend_enter(suspend_st
> >> break;
> >>
> >> case ACPI_STATE_S3:
> >> + tboot_switch_stack();
> >> do_suspend_lowlevel();
> >> + tboot_sx_resume();
> >> + tboot_restore_stack();
> >> break;
> >> }
> >>
> >
> > Did you audit all code before sx_resume()? If it trusts data not
> > checksummed by tboot, attacker may be able to hijack code execution
> > and bypass your protection, no?
> Yes, kernel code is audited by tboot before resume.
So no, you did not audit do_suspend_lowlevel to make sure it does not
follow function pointers. Bad.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
> This patch added verification for userspace memory integrity after
> S3 resume.
It does not work.
> Integrity verification for other memory (say kernel itself) has been done by tboot.
>
Not true. Kernel uses memory above 4G on x86-64. Including... say
console writing functions.
You can patch holes, but without description 'what does this protect
against' it is almost impossible to evaluate.
> +void tboot_do_suspend_lowlevel(void)
> +{
> + int ret = -1;
> +
> + if (!tboot_enabled()) {
> + do_suspend_lowlevel();
> + return;
> + }
> +
> + ret = tboot_pre_stack_switch();
> + if (!ret) {
> + tboot_switch_stack_call(tboot_do_suspend_lowlevel_call,
> + (u64)new_stack_ptr);
...and here you add requirements to suspend_lowlevel that were not
there before. ("May not act on unchecksummed memory"), without
documenting them.
NAK.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
(Sorry for so late response)
Thanks.
Shane
Pavel Machek wrote:
> On Sun 2009-09-27 17:07:28, Shane Wang wrote:
>> This patch added verification for userspace memory integrity after
>> s3 resume. Integrity verification for other memory (say kernel
>> itself) has been done by tboot.
>>
>
> AFAICT, it verifies userspace _and_ kernel memory, that's why it does
> magic stack switching. Why not verify everything in tboot?
Because tboot only can access <4G mem without paging. And the memory is sparse. We can't/needn't set unlimited sparse mem ranges to the MAC array with limited elements in the shared page, in order to pass the parameters.
On the other hand, it is reasonable for tboot to verify kernel, and kernel to verify userspace memory.
>> @@ -168,6 +174,80 @@ static void tboot_create_trampoline(void
>> map_base, map_size); }
>>
>> +static vmac_t mem_mac;
>> +static struct crypto_hash *tfm;
>
> Could these be automatic?
Maybe, but I don't wish other files can access the variables and take tfm as an example, I'd like to allocate memory to it once and then initialize it once in order to avoid impact of memory change to MACing.
>
>> + for_each_online_pgdat(pgdat) {
>> + for (i = 0, pfn = pgdat->node_start_pfn;
>> + i < pgdat->node_spanned_pages;
>> + i++, pfn = pgdat->node_start_pfn + i) {
>> +
>> + if (!pfn_valid(pfn) || !page_is_ram(pfn))
>> + continue;
>> +
>> + page = pfn_to_page(pfn);
>> + paddr = page_to_phys(page);
>> +
>> + /* If pg will be MACed by tboot, no need to MAC here */
>> + for (j = 0; j < tboot->num_mac_regions; j++) {
>> + rstart = tboot->mac_regions[j].start;
>> + rend = rstart + tboot->mac_regions[j].size;
>> + if (((paddr + PAGE_SIZE) <= rstart)
>> + || (rend <= paddr))
>> + continue;
>> + break;
>> + }
>> +
>> + if (j == tboot->num_mac_regions) {
>> + sg_set_page(sg, page, PAGE_SIZE, 0);
>> + ret = crypto_hash_update(&desc, sg, PAGE_SIZE); + if (ret)
>> + return ret;
>> + }
>> + }
>> + }
>
> This looks over all memory, afaict. Does it correctly handle highmem?
It looks over all online memory, and should handle highmem since highmem has been added into the online nodes.
refer to show_mem() in lib/show_mem.c.
>
>> @@ -197,6 +277,16 @@ static int tboot_setup_sleep(void)
>> /* kernel code + data + bss */
>> add_mac_region(virt_to_phys(_text), _end - _text);
>>
>> + /* stack */
>> + add_mac_region(virt_to_phys(current_thread_info()), THREAD_SIZE); +
>> + /* MAC userspace memory not handled by tboot */
>> + get_random_bytes(tboot->s3_key, sizeof(tboot->s3_key));
>> + if (tboot_gen_mem_integrity(tboot->s3_key, &mem_mac)) {
>> + panic("tboot: vmac generation failed\n");
>> + return -1;
>> + }
>> +
>> tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address;
>>
>> return 0;
>> @@ -212,6 +302,68 @@ static int tboot_setup_sleep(void) }
>>
>> #endif
>> +
>> +static struct thread_info *low_ti, *current_ti;
>> +
>> +static void tboot_do_stack_switch(struct thread_info *new_ti,
>> + struct thread_info *old_ti)
>> +{
>> + memcpy(new_ti, old_ti, THREAD_SIZE);
>
> Memcpy does some rather odd stuff with fpu in some
> configs. Like... you may get copy of thread info with fpu marked
> enabled, while it really is disabled when memcpy exits.
We gave up thread_info but stack, like interrupt stack and call.
>
>> +#ifdef CONFIG_X86_32
>> + asm volatile (
>> + " and %0, %%esp; "
>> + " add %1, %%esp; "
>> + : : "i" (THREAD_SIZE - 1), "r" (new_ti));
>> +#else
>> + asm volatile (
>> + " and %0, %%rsp; "
>> + " add %1, %%rsp; "
>> + : : "i" (THREAD_SIZE - 1), "r" (new_ti));
>> + percpu_write(kernel_stack, (unsigned long)new_ti -
>> + KERNEL_STACK_OFFSET + THREAD_SIZE);
>> +#endif
>> + current->stack = new_ti;
>> +}
>
> This is pretty subtle&fragile -- see memcpy above. Why is it needed?
Gave it up.
>
>> + current_ti = current_thread_info();
>> +
>> + /* If thread info is above 4G, then switch stack */
>> + if (!((PFN_PHYS(PFN_DOWN(virt_to_phys(current_ti)))
>> + + (PFN_UP(THREAD_SIZE) << PAGE_SHIFT))
>> + & 0xffffffff00000000ULL))
>> + return;
>
> How can thread info be above 4G on 32-bit?
OK, I fixed it.
>
> Why does 4G limit matter on 64-bit?
tboot can't access >4G, see above.
>
>> +void tboot_sx_resume(void)
>> +{
>> + vmac_t mac;
>> +
>> + if (!tboot_enabled())
>> + return;
>> +
>> + if (tboot_gen_mem_integrity(tboot->s3_key, &mac))
>> + panic("tboot: vmac generation failed\n");
>> + else if (mac != mem_mac)
>> + panic("tboot: memory integrity was lost on resume\n"); + else
>> + pr_info("memory integrity OK\n");
>
> So I corrupt memory, but also corrupt tboot_enabled() to return 0....
>
> And... does panic kill the machine quickly enough that no 'bad stuff'
> happens? (Whats bad stuff in this context, anyway?).
>
>> @@ -244,7 +245,10 @@ static int acpi_suspend_enter(suspend_st
>> break;
>>
>> case ACPI_STATE_S3:
>> + tboot_switch_stack();
>> do_suspend_lowlevel();
>> + tboot_sx_resume();
>> + tboot_restore_stack();
>> break;
>> }
>>
>
> Did you audit all code before sx_resume()? If it trusts data not
> checksummed by tboot, attacker may be able to hijack code execution
> and bypass your protection, no?
Yes, kernel code is audited by tboot before resume.
>
> What about S1?
>
> And what about hibernation, btw
No need memory MACing for S1/S4, it is another way in the tboot code.
> Pavel
> From: Pavel Machek [mailto:[email protected]]
> Sent: Friday, December 04, 2009 12:20 AM
>
> Hi!
>
> Please wrap mails at column 72 (or so).
>
> > > AFAICT, it verifies userspace _and_ kernel memory, that's why it does
> > > magic stack switching. Why not verify everything in tboot?
> > Because tboot only can access <4G mem without paging. And the memory is sparse. We
> can't/needn't set unlimited sparse mem ranges to the MAC array with limited elements in the
> shared page, in order to pass the parameters.
> > On the other hand, it is reasonable for tboot to verify kernel, and kernel to verify
> userspace memory.
> >
>
> Are you sure x86-64 kernel & modules is always below 4GB? I don't
> think so.
The only part of the kernel that really needs to be below 4GB is what is used by the code on resume to do the MAC verification of the remainder of the memory. This is all static code and variables. The regions we pass to tboot for MAC'ing are:
S3 real mode resume code: acpi_wakeup_address, WAKEUP_SIZE
AP trampoline code: virt_to_phys(trampoline_base), TRAMPOLINE_SIZE
kernel static code and data: virt_to_phys(_text), _end - _text
the stack we use (either new one or existing depending--see code):
virt_to_phys(new_stack), IRQ_STACK_SIZE
virt_to_phys(current_thread_info()), THREAD_SIZE
Can you post a .config and platform configuration for which the tboot, MAC, or do_suspend_lowlevel() are not within the regions above? We'll be happy to address the issue once we understand the conditions.
> > >> +static vmac_t mem_mac;
> > >> +static struct crypto_hash *tfm;
> > >
> > > Could these be automatic?
> > Maybe, but I don't wish other files can access the variables and take tfm as an example, I'd
> like to allocate memory to it once and then initialize it once in order to avoid impact of
> memory change to MACing.
> >
>
> You use stack, anyway.
Do you mean as static local variables? If so, I'm not sure if that would be any better.
> > > Why does 4G limit matter on 64-bit?
> > tboot can't access >4G, see above.
>
> Too bad, then its broken by design.
See above.
> > >> + if (tboot_gen_mem_integrity(tboot->s3_key, &mac))
> > >> + panic("tboot: vmac generation failed\n");
> > >> + else if (mac != mem_mac)
> > >> + panic("tboot: memory integrity was lost on resume\n"); + else
> > >> + pr_info("memory integrity OK\n");
> > >
> > > So I corrupt memory, but also corrupt tboot_enabled() to return 0....
> > >
> > > And... does panic kill the machine quickly enough that no 'bad stuff'
> > > happens? (Whats bad stuff in this context, anyway?).
>
> I'd really like you to answer that.
"bad stuff" would be the execution of any code (or use of any data that affects execution) that was not verified by tboot. As long as panic() is within the code ranges MAC'ed by tboot (see above), it would be covered. Do you know of some panic() code paths that are outside of this?
> > >> @@ -244,7 +245,10 @@ static int acpi_suspend_enter(suspend_st
> > >> break;
> > >>
> > >> case ACPI_STATE_S3:
> > >> + tboot_switch_stack();
> > >> do_suspend_lowlevel();
> > >> + tboot_sx_resume();
> > >> + tboot_restore_stack();
> > >> break;
> > >> }
> > >>
> > >
> > > Did you audit all code before sx_resume()? If it trusts data not
> > > checksummed by tboot, attacker may be able to hijack code execution
> > > and bypass your protection, no?
> > Yes, kernel code is audited by tboot before resume.
>
> So no, you did not audit do_suspend_lowlevel to make sure it does not
> follow function pointers. Bad.
We aren't aware of any code or data used by the resume path that is outside of the tboot-MAC'ed regions above--if you can point out any then we will gladly address them.
> Pavel
> --
> (english) http://www.livejournal.com/~pavelmachek
> (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
> From: Pavel Machek [mailto:[email protected]]
> Sent: Friday, December 04, 2009 12:29 AM
>
> > This patch added verification for userspace memory integrity after
> > S3 resume.
>
> It does not work.
>
> > Integrity verification for other memory (say kernel itself) has been done by tboot.
> >
>
> Not true. Kernel uses memory above 4G on x86-64. Including... say
> console writing functions.
I've responded to that in your previous email thread, so let's keep the technical discussion of what is MAC'ed to just that one thread.
> You can patch holes, but without description 'what does this protect
> against' it is almost impossible to evaluate.
>
>
> > +void tboot_do_suspend_lowlevel(void)
> > +{
> > + int ret = -1;
> > +
> > + if (!tboot_enabled()) {
> > + do_suspend_lowlevel();
> > + return;
> > + }
> > +
> > + ret = tboot_pre_stack_switch();
> > + if (!ret) {
> > + tboot_switch_stack_call(tboot_do_suspend_lowlevel_call,
> > + (u64)new_stack_ptr);
>
> ...and here you add requirements to suspend_lowlevel that were not
> there before. ("May not act on unchecksummed memory"), without
> documenting them.
Really the only requirement is (as discussed in the previous thread) that it only use code and data within [_text, _end - _text]--do you think that this really needs to be called out? do_suspend_lowlevel() is a very simple function that has just the one (resume path) call to another simple function--it doesn't seem likely that it would violate this.
>
> NAK.
> Pavel
>
> --
> (english) http://www.livejournal.com/~pavelmachek
> (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
> "bad stuff" would be the execution of any code (or use of any data that affects execution) that was not verified by tboot. As long as panic() is within the code ranges MAC'ed by tboot (see above), it would be covered. Do you know of some panic() code paths that are outside of this?
Not code path, but the code called by panic (console drivers, debuggers etc.)
can well use data that is stored >4GB
This can include structures with indirect pointers, like notifier chains.
Notifier chains have a special checker than can check
for <4GB, but there are other call vectors too.
> > > > checksummed by tboot, attacker may be able to hijack code execution
> > > > and bypass your protection, no?
> > > Yes, kernel code is audited by tboot before resume.
> >
> > So no, you did not audit do_suspend_lowlevel to make sure it does not
> > follow function pointers. Bad.
>
> We aren't aware of any code or data used by the resume path that is outside of the tboot-MAC'ed regions above--if you can point out any then we will gladly address them.
Code coverage is not enough, you need data coverage too. If someone
modifies kernel data it's typically easy to subvert code as a next step.
-Andi
--
[email protected] -- Speaking for myself only.
> From: Andi Kleen [mailto:[email protected]]
> Sent: Friday, December 04, 2009 9:14 AM
>
> > "bad stuff" would be the execution of any code (or use of any data that affects execution)
> that was not verified by tboot. As long as panic() is within the code ranges MAC'ed by tboot
> (see above), it would be covered. Do you know of some panic() code paths that are outside of
> this?
>
> Not code path, but the code called by panic (console drivers, debuggers etc.)
> can well use data that is stored >4GB
>
> This can include structures with indirect pointers, like notifier chains.
>
> Notifier chains have a special checker than can check
> for <4GB, but there are other call vectors too.
Since, as you pointed out in a previous email, it is doubtful that there will be any user-visible output at this point, we can change this path to a tboot reset (which will give us some serial output at least). Is it going to be similarly unsafe to do a printk()?
> > > > > checksummed by tboot, attacker may be able to hijack code execution
> > > > > and bypass your protection, no?
> > > > Yes, kernel code is audited by tboot before resume.
> > >
> > > So no, you did not audit do_suspend_lowlevel to make sure it does not
> > > follow function pointers. Bad.
> >
> > We aren't aware of any code or data used by the resume path that is outside of the tboot-
> MAC'ed regions above--if you can point out any then we will gladly address them.
>
> Code coverage is not enough, you need data coverage too. If someone
> modifies kernel data it's typically easy to subvert code as a next step.
Agreed, which is why I said "code or data". We'll take another look at the couple of fns that are within this path, but if you have any specific examples can you please post them.
>
>
> -Andi
> --
> [email protected] -- Speaking for myself only.
On 12/04/2009 09:13 AM, Andi Kleen wrote:
>>>
>>> So no, you did not audit do_suspend_lowlevel to make sure it does not
>>> follow function pointers. Bad.
>>
>> We aren't aware of any code or data used by the resume path that is outside of the tboot-MAC'ed regions above--if you can point out any then we will gladly address them.
>
> Code coverage is not enough, you need data coverage too. If someone
> modifies kernel data it's typically easy to subvert code as a next step.
>
The only function pointers that are invoked on the do_suspend_lowlevel
path are some paravirt_crap pointers, but those are located inside
kernel static data.
This is not to say that this isn't a new constraint, and should be
documented, and checked ahead of time...
-hpa
--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.
On Fri, Dec 04, 2009 at 09:41:24AM -0800, Cihula, Joseph wrote:
> > From: Andi Kleen [mailto:[email protected]]
> > Sent: Friday, December 04, 2009 9:14 AM
> >
> > > "bad stuff" would be the execution of any code (or use of any data that affects execution)
> > that was not verified by tboot. As long as panic() is within the code ranges MAC'ed by tboot
> > (see above), it would be covered. Do you know of some panic() code paths that are outside of
> > this?
> >
> > Not code path, but the code called by panic (console drivers, debuggers etc.)
> > can well use data that is stored >4GB
> >
> > This can include structures with indirect pointers, like notifier chains.
> >
> > Notifier chains have a special checker than can check
> > for <4GB, but there are other call vectors too.
>
> Since, as you pointed out in a previous email, it is doubtful that there will be any user-visible output at this point, we can change this path to a tboot reset (which will give us some serial output at least). Is it going to be similarly unsafe to do a printk()?
Yes printk is similarly unsafe. It calls all the console machinery,
which has a lot of data.
Perhaps early_printk(), that is relatively self contained, but doesn't
always work.
Of course you would need to have a timeout before reset, and at this point the
delay loops are not calibrated yet, so you don't know how to wait.
-Andi
--
[email protected] -- Speaking for myself only.
On Fri, Dec 04, 2009 at 09:53:37AM -0800, H. Peter Anvin wrote:
> On 12/04/2009 09:13 AM, Andi Kleen wrote:
> >>>
> >>> So no, you did not audit do_suspend_lowlevel to make sure it does not
> >>> follow function pointers. Bad.
> >>
> >> We aren't aware of any code or data used by the resume path that is outside of the tboot-MAC'ed regions above--if you can point out any then we will gladly address them.
> >
> > Code coverage is not enough, you need data coverage too. If someone
> > modifies kernel data it's typically easy to subvert code as a next step.
> >
>
> The only function pointers that are invoked on the do_suspend_lowlevel
> path are some paravirt_crap pointers, but those are located inside
> kernel static data.
Was referring to panic(), like Pavel said.
It would be relatively easy to subvert something called by panic
that just jumps back to after the MAC checks.
-Andi
--
[email protected] -- Speaking for myself only.
> From: Andi Kleen [mailto:[email protected]]
> Sent: Friday, December 04, 2009 12:10 PM
>
> On Fri, Dec 04, 2009 at 09:41:24AM -0800, Cihula, Joseph wrote:
> > > From: Andi Kleen [mailto:[email protected]]
> > > Sent: Friday, December 04, 2009 9:14 AM
> > >
> > > > "bad stuff" would be the execution of any code (or use of any data that affects
> execution)
> > > that was not verified by tboot. As long as panic() is within the code ranges MAC'ed by
> tboot
> > > (see above), it would be covered. Do you know of some panic() code paths that are outside
> of
> > > this?
> > >
> > > Not code path, but the code called by panic (console drivers, debuggers etc.)
> > > can well use data that is stored >4GB
> > >
> > > This can include structures with indirect pointers, like notifier chains.
> > >
> > > Notifier chains have a special checker than can check
> > > for <4GB, but there are other call vectors too.
> >
> > Since, as you pointed out in a previous email, it is doubtful that there will be any user-
> visible output at this point, we can change this path to a tboot reset (which will give us
> some serial output at least). Is it going to be similarly unsafe to do a printk()?
>
> Yes printk is similarly unsafe. It calls all the console machinery,
> which has a lot of data.
>
> Perhaps early_printk(), that is relatively self contained, but doesn't
> always work.
>
> Of course you would need to have a timeout before reset, and at this point the
> delay loops are not calibrated yet, so you don't know how to wait.
I would expect that early_printk() coupled with tboot's serial output would be sufficient for a case such as this. If we've done our work correctly, loss of integrity should only occur when the system is attacked across the S3 transition--which should be fairly rare and which should place a premium on prevention of the attacked code from executing. Esp. on servers, there may not be anyone to see console output anyway. Does early_printk() and a tboot reset seem like a reasonable approach?
>
> -Andi
>
> --
> [email protected] -- Speaking for myself only.
> > Of course you would need to have a timeout before reset, and at this point the
> > delay loops are not calibrated yet, so you don't know how to wait.
That was actually wrong, since you're coming back from S3
udelay should work. nm.
>
> I would expect that early_printk() coupled with tboot's serial output would be sufficient for a case such as this. If we've done our work correctly, loss of integrity should only occur when the system is attacked across the S3 transition--which should be fairly rare and which should place a premium on prevention of the attacked code from executing. Esp. on servers, there may not be anyone to see console output anyway. Does early_printk() and a tboot reset seem like a reasonable approach?
At least classical vga/serial early_printk should be safe, I'm not sure
about the early USB code recently added though, some auditing on that
first would be good.
early_printk defaults to VGA text output, so if you do a reset you would
need a delay first, otherwise noone can see it ever. But one could be
done with udelay()
It'll be also invisible with frame buffer active, which is the common
case for distributions. So basically in most cases the message would be
invisible.
(not that panic is much better by default in this regard though,
at least not without Jesse's recent frame buffer work ...)
-Andi
--
[email protected] -- Speaking for myself only.
On 12/04/2009 12:17 PM, Cihula, Joseph wrote:
>
> I would expect that early_printk() coupled with tboot's serial output would be sufficient for a case such as this. If we've done our work correctly, loss of integrity should only occur when the system is attacked across the S3 transition--which should be fairly rare and which should place a premium on prevention of the attacked code from executing. Esp. on servers, there may not be anyone to see console output anyway. Does early_printk() and a tboot reset seem like a reasonable approach?
>
The zeroeth-order thing is you should reliably crash and reboot. This
is pretty normal for most S3 resume failures anyway, so although it is
somewhat unfortunate for debugging, it is isn't exactly an unreasonable
thing to do.
Getting a message out is a nice plus, but not a requirement.
Guaranteeing the integrity is an absolute requirement.
-hpa
On Fri 2009-12-04 08:46:04, Cihula, Joseph wrote:
> > From: Pavel Machek [mailto:[email protected]]
> > Sent: Friday, December 04, 2009 12:20 AM
> >
> > Hi!
> >
> > Please wrap mails at column 72 (or so).
> >
> > > > AFAICT, it verifies userspace _and_ kernel memory, that's why it does
> > > > magic stack switching. Why not verify everything in tboot?
> > > Because tboot only can access <4G mem without paging. And the memory is sparse. We
> > can't/needn't set unlimited sparse mem ranges to the MAC array with limited elements in the
> > shared page, in order to pass the parameters.
> > > On the other hand, it is reasonable for tboot to verify kernel, and kernel to verify
> > userspace memory.
> > >
> >
> > Are you sure x86-64 kernel & modules is always below 4GB? I don't
> > think so.
>
> The only part of the kernel that really needs to be below 4GB is what is used by the code on resume to do the MAC verification of the remainder of the memory. This is all static code and variables. The regions we pass to tboot for MAC'ing are:
> S3 real mode resume code: acpi_wakeup_address, WAKEUP_SIZE
> AP trampoline code: virt_to_phys(trampoline_base), TRAMPOLINE_SIZE
> kernel static code and data: virt_to_phys(_text), _end - _text
> the stack we use (either new one or existing depending--see code):
> virt_to_phys(new_stack), IRQ_STACK_SIZE
> virt_to_phys(current_thread_info()), THREAD_SIZE
>
Yes, so... what guarantees these actually are under 4GB? Perhaps you
should put them to spearate section and use linker magic?
You certainly should annotate them somehow so people modifying the
code know about this new requirement.
> Can you post a .config and platform configuration for which the tboot, MAC, or do_suspend_lowlevel() are not within the regions above? We'll be happy to address the issue once we understand the conditions.
>
Can you prove that for all .config s on all machines they are under
4GB? On all future kernels, too? I thought so.
> > > >> +static vmac_t mem_mac;
> > > >> +static struct crypto_hash *tfm;
> > > >
> > > > Could these be automatic?
> > > Maybe, but I don't wish other files can access the variables and take tfm as an example, I'd
> > like to allocate memory to it once and then initialize it once in order to avoid impact of
> > memory change to MACing.
> > >
> >
> > You use stack, anyway.
>
> Do you mean as static local variables? If so, I'm not sure if that would be any better.
>
Those two variables (mem_mac and tfm) are static. I do not think they
need to be, and code would be much nicer if they were not.
> > > > So I corrupt memory, but also corrupt tboot_enabled() to return 0....
> > > >
> > > > And... does panic kill the machine quickly enough that no 'bad stuff'
> > > > happens? (Whats bad stuff in this context, anyway?).
> >
> > I'd really like you to answer that.
>
> "bad stuff" would be the execution of any code (or use of any data that affects execution) that was not verified by tboot. As long as panic() is within the code ranges MAC'ed by tboot (see above), it would be covered. Do you know of some panic() code paths that are outside of this?
>
(Please wrap at column 72).
Yes, I know that panic() has problems.
> > > > Did you audit all code before sx_resume()? If it trusts data not
> > > > checksummed by tboot, attacker may be able to hijack code execution
> > > > and bypass your protection, no?
> > > Yes, kernel code is audited by tboot before resume.
> >
> > So no, you did not audit do_suspend_lowlevel to make sure it does not
> > follow function pointers. Bad.
>
> We aren't aware of any code or data used by the resume path that is outside of the tboot-MAC'ed regions above--if you can point out any then we will gladly address them.
>
I'm asking you to do the code audit, and you tell me to do it
myself. I'm not interested in doing your work. Yes, they are some
obvious places (like panic), that you missed. There are probably more.
Design your code properly so that it does not depend on details like
that. That means checksumming in tboot.
[Or do the auditing work. But don't try playing "we fixed all bugs
you told us about, so it must be perfect" game with me.]
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
> > > +
> > > + ret = tboot_pre_stack_switch();
> > > + if (!ret) {
> > > + tboot_switch_stack_call(tboot_do_suspend_lowlevel_call,
> > > + (u64)new_stack_ptr);
> >
> > ...and here you add requirements to suspend_lowlevel that were not
> > there before. ("May not act on unchecksummed memory"), without
> > documenting them.
>
> Really the only requirement is (as discussed in the previous thread) that it only use code and data within [_text, _end - _text]--do you think that this really needs to be called out? do_suspend_lowlevel() is a very simple function that has just the one (resume path) call to another simple function--it doesn't seem likely that it would violate this.
>
Resume code is already pretty tricky, and you add yet another layer of
trickery. Yes, it needs to be properly documented.
And... how does it interact with restore_processor_state? I don't
think do_suspend_lowlevel is as simple as you think...
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On 12/04/2009 02:15 PM, Pavel Machek wrote:
>>>
>>> Are you sure x86-64 kernel & modules is always below 4GB? I don't
>>> think so.
The x86-64 kernel is run where it is loaded by the boot loader. For
most boot loaders, that will mean < 4 GB. This is not the case for
modules, and they cannot and should not rely on modules inside
restricted zone.
This effectively becomes a constraint on whatever boot loader is used to
load the kernel for it to be compatible with tboot.
-hpa
On Fri 2009-12-04 09:53:37, H. Peter Anvin wrote:
> On 12/04/2009 09:13 AM, Andi Kleen wrote:
> >>>
> >>> So no, you did not audit do_suspend_lowlevel to make sure it does not
> >>> follow function pointers. Bad.
> >>
> >> We aren't aware of any code or data used by the resume path that is outside of the tboot-MAC'ed regions above--if you can point out any then we will gladly address them.
> >
> > Code coverage is not enough, you need data coverage too. If someone
> > modifies kernel data it's typically easy to subvert code as a next step.
> >
>
> The only function pointers that are invoked on the do_suspend_lowlevel
> path are some paravirt_crap pointers, but those are located inside
> kernel static data.
What guarantees kernel static data are below 4GB? What prevents me
from booting with funny memmap where first 1MB is mapped, and then
memory above 4GB? What prevents Chinese company to ship machine with
such funny memmap?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Fri 2009-12-04 14:24:24, H. Peter Anvin wrote:
> On 12/04/2009 02:15 PM, Pavel Machek wrote:
> >>>
> >>> Are you sure x86-64 kernel & modules is always below 4GB? I don't
> >>> think so.
>
> The x86-64 kernel is run where it is loaded by the boot loader. For
> most boot loaders, that will mean < 4 GB. This is not the case for
> modules, and they cannot and should not rely on modules inside
> restricted zone.
>
> This effectively becomes a constraint on whatever boot loader is used to
> load the kernel for it to be compatible with tboot.
Having "security" technology that silently fails with funny bootloader
is pretty bad, I'd say.
Instead of doing this properly (in tboot), Joseph hopes to save some
work by basically splitting kernel into two parts, "trusted" and
"untrusted". But doing that properly would be too much work, so he
just handwaves and hopes for the best.
Unfortunately that
a) does not work (panic, printks)
b) places funny constraints all over the code (documenting them would
be too much work, so we get more handwaving)
c) reduces future flexibility (trusted code can not be over 4GB, it is
silent security hole when it is)
I'd prefer to see this done properly; tboot should simply verify all
the memory for us. It is separate piece of code, trusted, and can
probably fit itself under 4GB.
If that seems like too much work, then please go all over the code,
and mark all the code that is "trusted" (has to be under 4GB) and
properly document it, so that future modifications will not break that
assumption. Putting all that code into one section should be enough.
(Going into infinite loop is probably enough when memory corruption is
detected; there's no chance you can put printk/panic/neccessary
drivers all into the "trusted" section.)
Aha, and look. Your tboot_gen_mem_integrity is ran on resume, so it is
"trusted". Unfortunately, it calls crypto_alloc_hash, so you need to
audit that, and probably kmalloc it boils down into. I bet kmalloc
touches memory >4GB. And I bet crypto modules can be
... well... modules. That means over 4GB.
This is broken by design, right?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On 12/04/2009 02:39 PM, Pavel Machek wrote:
>
> Having "security" technology that silently fails with funny bootloader
> is pretty bad, I'd say.
>
Yes, but this wouldn't be a silent failure -- such a boot loader
wouldn't be able to boot tboot itself either, nor would be able to boot
32-bit kernels (which, in fact, not all boot loaders can); the tboot
boot process in fact in many ways treats tboot itself as a 32-bit
primary kernel, with the Linux kernel as a secondary kernel.
So, this particular failure would not be silent by any means.
-hpa