diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/sgx/driver.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/sgx/ioctl.c | 30 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/sgx/main.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/crash.c | 26 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/kexec-bzimage64.c | 58 | ||||
-rw-r--r-- | arch/x86/kernel/machine_kexec_64.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 42 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 54 |
9 files changed, 207 insertions, 46 deletions
diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h index 4eddb4d571ef..30f39f92c98f 100644 --- a/arch/x86/kernel/cpu/sgx/driver.h +++ b/arch/x86/kernel/cpu/sgx/driver.h @@ -2,7 +2,6 @@ #ifndef __ARCH_SGX_DRIVER_H__ #define __ARCH_SGX_DRIVER_H__ -#include <crypto/hash.h> #include <linux/kref.h> #include <linux/mmu_notifier.h> #include <linux/radix-tree.h> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index 776a20172867..66f1efa16fbb 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -3,6 +3,7 @@ #include <asm/mman.h> #include <asm/sgx.h> +#include <crypto/sha2.h> #include <linux/mman.h> #include <linux/delay.h> #include <linux/file.h> @@ -463,31 +464,6 @@ static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg) return ret; } -static int __sgx_get_key_hash(struct crypto_shash *tfm, const void *modulus, - void *hash) -{ - SHASH_DESC_ON_STACK(shash, tfm); - - shash->tfm = tfm; - - return crypto_shash_digest(shash, modulus, SGX_MODULUS_SIZE, hash); -} - -static int sgx_get_key_hash(const void *modulus, void *hash) -{ - struct crypto_shash *tfm; - int ret; - - tfm = crypto_alloc_shash("sha256", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - ret = __sgx_get_key_hash(tfm, modulus, hash); - - crypto_free_shash(tfm); - return ret; -} - static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct, void *token) { @@ -523,9 +499,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct, sgx_xfrm_reserved_mask) return -EINVAL; - ret = sgx_get_key_hash(sigstruct->modulus, mrsigner); - if (ret) - return ret; + sha256(sigstruct->modulus, SGX_MODULUS_SIZE, (u8 *)mrsigner); mutex_lock(&encl->lock); diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 6722b2fc82cf..2de01b379aa3 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -720,6 +720,8 @@ int arch_memory_failure(unsigned long pfn, int flags) goto out; } + sgx_unmark_page_reclaimable(page); + /* * TBD: Add additional plumbing to enable pre-emptive * action for asynchronous poison notification. Until diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 0be61c45400c..bcb534688dfe 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -278,6 +278,7 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem, unsigned long long mend) { unsigned long start, end; + int ret; cmem->ranges[0].start = mstart; cmem->ranges[0].end = mend; @@ -286,22 +287,43 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem, /* Exclude elf header region */ start = image->elf_load_addr; end = start + image->elf_headers_sz - 1; - return crash_exclude_mem_range(cmem, start, end); + ret = crash_exclude_mem_range(cmem, start, end); + + if (ret) + return ret; + + /* Exclude dm crypt keys region */ + if (image->dm_crypt_keys_addr) { + start = image->dm_crypt_keys_addr; + end = start + image->dm_crypt_keys_sz - 1; + return crash_exclude_mem_range(cmem, start, end); + } + + return ret; } /* Prepare memory map for crash dump kernel */ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) { + unsigned int nr_ranges = 0; int i, ret = 0; unsigned long flags; struct e820_entry ei; struct crash_memmap_data cmd; struct crash_mem *cmem; - cmem = vzalloc(struct_size(cmem, ranges, 1)); + /* + * Using random kexec_buf for passing dm crypt keys may cause a range + * split. So use two slots here. + */ + nr_ranges = 2; + cmem = vzalloc(struct_size(cmem, ranges, nr_ranges)); if (!cmem) return -ENOMEM; + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + memset(&cmd, 0, sizeof(struct crash_memmap_data)); cmd.params = params; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 9920122018a0..c3acbd26408b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1300,6 +1300,24 @@ void __init e820__memblock_setup(void) } /* + * At this point memblock is only allowed to allocate from memory + * below 1M (aka ISA_END_ADDRESS) up until direct map is completely set + * up in init_mem_mapping(). + * + * KHO kernels are special and use only scratch memory for memblock + * allocations, but memory below 1M is ignored by kernel after early + * boot and cannot be naturally marked as scratch. + * + * To allow allocation of the real-mode trampoline and a few (if any) + * other very early allocations from below 1M forcibly mark the memory + * below 1M as scratch. + * + * After real mode trampoline is allocated, we clear that scratch + * marking. + */ + memblock_mark_kho_scratch(0, SZ_1M); + + /* * 32-bit systems are limited to 4BG of memory even with HIGHMEM and * to even less without it. * Discard memory after max_pfn - the actual limit detected at runtime. diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 68530fad05f7..24a41f0e0cf1 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -27,6 +27,8 @@ #include <asm/kexec-bzimage64.h> #define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */ +#define MAX_DMCRYPTKEYS_STR_LEN 31 /* dmcryptkeys=0x<64bit-value> */ + /* * Defines lowest physical address for various segments. Not sure where @@ -76,6 +78,10 @@ static int setup_cmdline(struct kimage *image, struct boot_params *params, if (image->type == KEXEC_TYPE_CRASH) { len = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", image->elf_load_addr); + + if (image->dm_crypt_keys_addr != 0) + len += sprintf(cmdline_ptr + len, + "dmcryptkeys=0x%lx ", image->dm_crypt_keys_addr); } memcpy(cmdline_ptr + len, cmdline, cmdline_len); cmdline_len += len; @@ -233,6 +239,32 @@ setup_ima_state(const struct kimage *image, struct boot_params *params, #endif /* CONFIG_IMA_KEXEC */ } +static void setup_kho(const struct kimage *image, struct boot_params *params, + unsigned long params_load_addr, + unsigned int setup_data_offset) +{ + struct setup_data *sd = (void *)params + setup_data_offset; + struct kho_data *kho = (void *)sd + sizeof(*sd); + + if (!IS_ENABLED(CONFIG_KEXEC_HANDOVER)) + return; + + sd->type = SETUP_KEXEC_KHO; + sd->len = sizeof(struct kho_data); + + /* Only add if we have all KHO images in place */ + if (!image->kho.fdt || !image->kho.scratch) + return; + + /* Add setup data */ + kho->fdt_addr = image->kho.fdt; + kho->fdt_size = PAGE_SIZE; + kho->scratch_addr = image->kho.scratch->mem; + kho->scratch_size = image->kho.scratch->bufsz; + sd->next = params->hdr.setup_data; + params->hdr.setup_data = params_load_addr + setup_data_offset; +} + static int setup_boot_parameters(struct kimage *image, struct boot_params *params, unsigned long params_load_addr, @@ -312,6 +344,13 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, sizeof(struct ima_setup_data); } + if (IS_ENABLED(CONFIG_KEXEC_HANDOVER)) { + /* Setup space to store preservation metadata */ + setup_kho(image, params, params_load_addr, setup_data_offset); + setup_data_offset += sizeof(struct setup_data) + + sizeof(struct kho_data); + } + /* Setup RNG seed */ setup_rng_seed(params, params_load_addr, setup_data_offset); @@ -441,6 +480,19 @@ static void *bzImage64_load(struct kimage *image, char *kernel, ret = crash_load_segments(image); if (ret) return ERR_PTR(ret); + ret = crash_load_dm_crypt_keys(image); + if (ret == -ENOENT) { + kexec_dprintk("No dm crypt key to load\n"); + } else if (ret) { + pr_err("Failed to load dm crypt keys\n"); + return ERR_PTR(ret); + } + if (image->dm_crypt_keys_addr && + cmdline_len + MAX_ELFCOREHDR_STR_LEN + MAX_DMCRYPTKEYS_STR_LEN > + header->cmdline_size) { + pr_err("Appending dmcryptkeys=<addr> to command line exceeds maximum allowed length\n"); + return ERR_PTR(-EINVAL); + } } #endif @@ -468,6 +520,8 @@ static void *bzImage64_load(struct kimage *image, char *kernel, efi_map_sz = efi_get_runtime_map_size(); params_cmdline_sz = sizeof(struct boot_params) + cmdline_len + MAX_ELFCOREHDR_STR_LEN; + if (image->dm_crypt_keys_addr) + params_cmdline_sz += MAX_DMCRYPTKEYS_STR_LEN; params_cmdline_sz = ALIGN(params_cmdline_sz, 16); kbuf.bufsz = params_cmdline_sz + ALIGN(efi_map_sz, 16) + sizeof(struct setup_data) + @@ -479,6 +533,10 @@ static void *bzImage64_load(struct kimage *image, char *kernel, kbuf.bufsz += sizeof(struct setup_data) + sizeof(struct ima_setup_data); + if (IS_ENABLED(CONFIG_KEXEC_HANDOVER)) + kbuf.bufsz += sizeof(struct setup_data) + + sizeof(struct kho_data); + params = kzalloc(kbuf.bufsz, GFP_KERNEL); if (!params) return ERR_PTR(-ENOMEM); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 949c9e4bfad2..697fb99406e6 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -630,13 +630,35 @@ static void kexec_mark_crashkres(bool protect) kexec_mark_range(control, crashk_res.end, protect); } +/* make the memory storing dm crypt keys in/accessible */ +static void kexec_mark_dm_crypt_keys(bool protect) +{ + unsigned long start_paddr, end_paddr; + unsigned int nr_pages; + + if (kexec_crash_image->dm_crypt_keys_addr) { + start_paddr = kexec_crash_image->dm_crypt_keys_addr; + end_paddr = start_paddr + kexec_crash_image->dm_crypt_keys_sz - 1; + nr_pages = (PAGE_ALIGN(end_paddr) - PAGE_ALIGN_DOWN(start_paddr))/PAGE_SIZE; + if (protect) + set_memory_np((unsigned long)phys_to_virt(start_paddr), nr_pages); + else + __set_memory_prot( + (unsigned long)phys_to_virt(start_paddr), + nr_pages, + __pgprot(_PAGE_PRESENT | _PAGE_NX | _PAGE_RW)); + } +} + void arch_kexec_protect_crashkres(void) { kexec_mark_crashkres(true); + kexec_mark_dm_crypt_keys(true); } void arch_kexec_unprotect_crashkres(void) { + kexec_mark_dm_crypt_keys(false); kexec_mark_crashkres(false); } #endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 7d9ed79a93c0..fb27be697128 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -282,8 +282,8 @@ static void __init cleanup_highmap(void) static void __init reserve_brk(void) { if (_brk_end > _brk_start) - memblock_reserve(__pa_symbol(_brk_start), - _brk_end - _brk_start); + memblock_reserve_kern(__pa_symbol(_brk_start), + _brk_end - _brk_start); /* Mark brk area as locked down and no longer taking any new allocations */ @@ -356,7 +356,7 @@ static void __init early_reserve_initrd(void) !ramdisk_image || !ramdisk_size) return; /* No initrd provided by bootloader */ - memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); + memblock_reserve_kern(ramdisk_image, ramdisk_end - ramdisk_image); } static void __init reserve_initrd(void) @@ -409,7 +409,7 @@ static void __init add_early_ima_buffer(u64 phys_addr) } if (data->size) { - memblock_reserve(data->addr, data->size); + memblock_reserve_kern(data->addr, data->size); ima_kexec_buffer_phys = data->addr; ima_kexec_buffer_size = data->size; } @@ -447,6 +447,29 @@ int __init ima_get_kexec_buffer(void **addr, size_t *size) } #endif +static void __init add_kho(u64 phys_addr, u32 data_len) +{ + struct kho_data *kho; + u64 addr = phys_addr + sizeof(struct setup_data); + u64 size = data_len - sizeof(struct setup_data); + + if (!IS_ENABLED(CONFIG_KEXEC_HANDOVER)) { + pr_warn("Passed KHO data, but CONFIG_KEXEC_HANDOVER not set. Ignoring.\n"); + return; + } + + kho = early_memremap(addr, size); + if (!kho) { + pr_warn("setup: failed to memremap kho data (0x%llx, 0x%llx)\n", + addr, size); + return; + } + + kho_populate(kho->fdt_addr, kho->fdt_size, kho->scratch_addr, kho->scratch_size); + + early_memunmap(kho, size); +} + static void __init parse_setup_data(void) { struct setup_data *data; @@ -475,6 +498,9 @@ static void __init parse_setup_data(void) case SETUP_IMA: add_early_ima_buffer(pa_data); break; + case SETUP_KEXEC_KHO: + add_kho(pa_data, data_len); + break; case SETUP_RNG_SEED: data = early_memremap(pa_data, data_len); add_bootloader_randomness(data->data, data->len); @@ -549,7 +575,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) len = sizeof(*data); pa_next = data->next; - memblock_reserve(pa_data, sizeof(*data) + data->len); + memblock_reserve_kern(pa_data, sizeof(*data) + data->len); if (data->type == SETUP_INDIRECT) { len += data->len; @@ -563,7 +589,7 @@ static void __init memblock_x86_reserve_range_setup_data(void) indirect = (struct setup_indirect *)data->data; if (indirect->type != SETUP_INDIRECT) - memblock_reserve(indirect->addr, indirect->len); + memblock_reserve_kern(indirect->addr, indirect->len); } pa_data = pa_next; @@ -766,8 +792,8 @@ static void __init early_reserve_memory(void) * __end_of_kernel_reserve symbol must be explicitly reserved with a * separate memblock_reserve() or they will be discarded. */ - memblock_reserve(__pa_symbol(_text), - (unsigned long)__end_of_kernel_reserve - (unsigned long)_text); + memblock_reserve_kern(__pa_symbol(_text), + (unsigned long)__end_of_kernel_reserve - (unsigned long)_text); /* * The first 4Kb of memory is a BIOS owned area, but generally it is diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b90d872aa0c8..1ba92ac9441d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1244,10 +1244,6 @@ void play_dead_common(void) local_irq_disable(); } -/* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1294,6 +1290,50 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } /* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ +static inline void mwait_play_dead_cpuid_hint(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + return; + if (!this_cpu_has(X86_FEATURE_MWAIT)) + return; + if (!this_cpu_has(X86_FEATURE_CLFLUSH)) + return; + + eax = CPUID_LEAF_MWAIT; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + + /* + * eax will be 0 if EDX enumeration is not valid. + * Initialized below to cstate, sub_cstate value when EDX is valid. + */ + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { + eax = 0; + } else { + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + } + + mwait_play_dead(eax); +} + +/* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). */ @@ -1343,9 +1383,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - /* Below returns only on error. */ - cpuidle_play_dead(); - hlt_play_dead(); + mwait_play_dead_cpuid_hint(); + if (cpuidle_play_dead()) + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ |