summaryrefslogtreecommitdiff
path: root/kernel/kexec_handover.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/kexec_handover.c')
-rw-r--r--kernel/kexec_handover.c453
1 files changed, 392 insertions, 61 deletions
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index ecd1ac210dbd..76f0940fb485 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -18,6 +18,7 @@
#include <linux/memblock.h>
#include <linux/notifier.h>
#include <linux/page-isolation.h>
+#include <linux/vmalloc.h>
#include <asm/early_ioremap.h>
@@ -32,6 +33,22 @@
#define PROP_PRESERVED_MEMORY_MAP "preserved-memory-map"
#define PROP_SUB_FDT "fdt"
+#define KHO_PAGE_MAGIC 0x4b484f50U /* ASCII for 'KHOP' */
+
+/*
+ * KHO uses page->private, which is an unsigned long, to store page metadata.
+ * Use it to store both the magic and the order.
+ */
+union kho_page_info {
+ unsigned long page_private;
+ struct {
+ unsigned int order;
+ unsigned int magic;
+ };
+};
+
+static_assert(sizeof(union kho_page_info) == sizeof(((struct page *)0)->private));
+
static bool kho_enable __ro_after_init;
bool kho_is_enabled(void)
@@ -91,6 +108,29 @@ struct kho_serialization {
struct khoser_mem_chunk *preserved_mem_map;
};
+struct kho_out {
+ struct blocking_notifier_head chain_head;
+
+ struct dentry *dir;
+
+ struct mutex lock; /* protects KHO FDT finalization */
+
+ struct kho_serialization ser;
+ bool finalized;
+};
+
+static struct kho_out kho_out = {
+ .chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head),
+ .lock = __MUTEX_INITIALIZER(kho_out.lock),
+ .ser = {
+ .fdt_list = LIST_HEAD_INIT(kho_out.ser.fdt_list),
+ .track = {
+ .orders = XARRAY_INIT(kho_out.ser.track.orders, 0),
+ },
+ },
+ .finalized = false,
+};
+
static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
{
void *elm, *res;
@@ -149,6 +189,9 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
might_sleep();
+ if (kho_out.finalized)
+ return -EBUSY;
+
physxa = xa_load(&track->orders, order);
if (!physxa) {
int err;
@@ -183,11 +226,27 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn,
return 0;
}
-/* almost as free_reserved_page(), just don't free the page */
-static void kho_restore_page(struct page *page, unsigned int order)
+static struct page *kho_restore_page(phys_addr_t phys)
{
- unsigned int nr_pages = (1 << order);
+ struct page *page = pfn_to_online_page(PHYS_PFN(phys));
+ union kho_page_info info;
+ unsigned int nr_pages;
+
+ if (!page)
+ return NULL;
+
+ info.page_private = page->private;
+ /*
+ * deserialize_bitmap() only sets the magic on the head page. This magic
+ * check also implicitly makes sure phys is order-aligned since for
+ * non-order-aligned phys addresses, magic will never be set.
+ */
+ if (WARN_ON_ONCE(info.magic != KHO_PAGE_MAGIC || info.order > MAX_PAGE_ORDER))
+ return NULL;
+ nr_pages = (1 << info.order);
+ /* Clear private to make sure later restores on this page error out. */
+ page->private = 0;
/* Head page gets refcount of 1. */
set_page_count(page, 1);
@@ -195,10 +254,11 @@ static void kho_restore_page(struct page *page, unsigned int order)
for (unsigned int i = 1; i < nr_pages; i++)
set_page_count(page + i, 0);
- if (order > 0)
- prep_compound_page(page, order);
+ if (info.order > 0)
+ prep_compound_page(page, info.order);
adjust_managed_page_count(page, nr_pages);
+ return page;
}
/**
@@ -209,20 +269,42 @@ static void kho_restore_page(struct page *page, unsigned int order)
*/
struct folio *kho_restore_folio(phys_addr_t phys)
{
- struct page *page = pfn_to_online_page(PHYS_PFN(phys));
- unsigned long order;
+ struct page *page = kho_restore_page(phys);
- if (!page)
- return NULL;
+ return page ? page_folio(page) : NULL;
+}
+EXPORT_SYMBOL_GPL(kho_restore_folio);
- order = page->private;
- if (order > MAX_PAGE_ORDER)
- return NULL;
+/**
+ * kho_restore_pages - restore list of contiguous order 0 pages.
+ * @phys: physical address of the first page.
+ * @nr_pages: number of pages.
+ *
+ * Restore a contiguous list of order 0 pages that was preserved with
+ * kho_preserve_pages().
+ *
+ * Return: 0 on success, error code on failure
+ */
+struct page *kho_restore_pages(phys_addr_t phys, unsigned int nr_pages)
+{
+ const unsigned long start_pfn = PHYS_PFN(phys);
+ const unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long pfn = start_pfn;
- kho_restore_page(page, order);
- return page_folio(page);
+ while (pfn < end_pfn) {
+ const unsigned int order =
+ min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));
+ struct page *page = kho_restore_page(PFN_PHYS(pfn));
+
+ if (!page)
+ return NULL;
+ split_page(page, order);
+ pfn += 1 << order;
+ }
+
+ return pfn_to_page(start_pfn);
}
-EXPORT_SYMBOL_GPL(kho_restore_folio);
+EXPORT_SYMBOL_GPL(kho_restore_pages);
/* Serialize and deserialize struct kho_mem_phys across kexec
*
@@ -341,10 +423,13 @@ static void __init deserialize_bitmap(unsigned int order,
phys_addr_t phys =
elm->phys_start + (bit << (order + PAGE_SHIFT));
struct page *page = phys_to_page(phys);
+ union kho_page_info info;
memblock_reserve(phys, sz);
memblock_reserved_mark_noinit(phys, sz);
- page->private = order;
+ info.magic = KHO_PAGE_MAGIC;
+ info.order = order;
+ page->private = info.page_private;
}
}
@@ -405,6 +490,7 @@ static int __init kho_parse_scratch_size(char *p)
{
size_t len;
unsigned long sizes[3];
+ size_t total_size = 0;
int i;
if (!p)
@@ -441,11 +527,19 @@ static int __init kho_parse_scratch_size(char *p)
}
sizes[i] = memparse(p, &endp);
- if (!sizes[i] || endp == p)
+ if (endp == p)
return -EINVAL;
p = endp;
+ total_size += sizes[i];
}
+ if (!total_size)
+ return -EINVAL;
+
+ /* The string should be fully consumed by now. */
+ if (*p)
+ return -EINVAL;
+
scratch_size_lowmem = sizes[0];
scratch_size_global = sizes[1];
scratch_size_pernode = sizes[2];
@@ -631,29 +725,6 @@ int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt)
}
EXPORT_SYMBOL_GPL(kho_add_subtree);
-struct kho_out {
- struct blocking_notifier_head chain_head;
-
- struct dentry *dir;
-
- struct mutex lock; /* protects KHO FDT finalization */
-
- struct kho_serialization ser;
- bool finalized;
-};
-
-static struct kho_out kho_out = {
- .chain_head = BLOCKING_NOTIFIER_INIT(kho_out.chain_head),
- .lock = __MUTEX_INITIALIZER(kho_out.lock),
- .ser = {
- .fdt_list = LIST_HEAD_INIT(kho_out.ser.fdt_list),
- .track = {
- .orders = XARRAY_INIT(kho_out.ser.track.orders, 0),
- },
- },
- .finalized = false,
-};
-
int register_kho_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&kho_out.chain_head, nb);
@@ -681,37 +752,28 @@ int kho_preserve_folio(struct folio *folio)
const unsigned int order = folio_order(folio);
struct kho_mem_track *track = &kho_out.ser.track;
- if (kho_out.finalized)
- return -EBUSY;
-
return __kho_preserve_order(track, pfn, order);
}
EXPORT_SYMBOL_GPL(kho_preserve_folio);
/**
- * kho_preserve_phys - preserve a physically contiguous range across kexec.
- * @phys: physical address of the range.
- * @size: size of the range.
+ * kho_preserve_pages - preserve contiguous pages across kexec
+ * @page: first page in the list.
+ * @nr_pages: number of pages.
*
- * Instructs KHO to preserve the memory range from @phys to @phys + @size
- * across kexec.
+ * Preserve a contiguous list of order 0 pages. Must be restored using
+ * kho_restore_pages() to ensure the pages are restored properly as order 0.
*
* Return: 0 on success, error code on failure
*/
-int kho_preserve_phys(phys_addr_t phys, size_t size)
+int kho_preserve_pages(struct page *page, unsigned int nr_pages)
{
- unsigned long pfn = PHYS_PFN(phys);
+ struct kho_mem_track *track = &kho_out.ser.track;
+ const unsigned long start_pfn = page_to_pfn(page);
+ const unsigned long end_pfn = start_pfn + nr_pages;
+ unsigned long pfn = start_pfn;
unsigned long failed_pfn = 0;
- const unsigned long start_pfn = pfn;
- const unsigned long end_pfn = PHYS_PFN(phys + size);
int err = 0;
- struct kho_mem_track *track = &kho_out.ser.track;
-
- if (kho_out.finalized)
- return -EBUSY;
-
- if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
- return -EINVAL;
while (pfn < end_pfn) {
const unsigned int order =
@@ -731,7 +793,256 @@ int kho_preserve_phys(phys_addr_t phys, size_t size)
return err;
}
-EXPORT_SYMBOL_GPL(kho_preserve_phys);
+EXPORT_SYMBOL_GPL(kho_preserve_pages);
+
+struct kho_vmalloc_hdr {
+ DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *);
+};
+
+#define KHO_VMALLOC_SIZE \
+ ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \
+ sizeof(phys_addr_t))
+
+struct kho_vmalloc_chunk {
+ struct kho_vmalloc_hdr hdr;
+ phys_addr_t phys[KHO_VMALLOC_SIZE];
+};
+
+static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE);
+
+/* vmalloc flags KHO supports */
+#define KHO_VMALLOC_SUPPORTED_FLAGS (VM_ALLOC | VM_ALLOW_HUGE_VMAP)
+
+/* KHO internal flags for vmalloc preservations */
+#define KHO_VMALLOC_ALLOC 0x0001
+#define KHO_VMALLOC_HUGE_VMAP 0x0002
+
+static unsigned short vmalloc_flags_to_kho(unsigned int vm_flags)
+{
+ unsigned short kho_flags = 0;
+
+ if (vm_flags & VM_ALLOC)
+ kho_flags |= KHO_VMALLOC_ALLOC;
+ if (vm_flags & VM_ALLOW_HUGE_VMAP)
+ kho_flags |= KHO_VMALLOC_HUGE_VMAP;
+
+ return kho_flags;
+}
+
+static unsigned int kho_flags_to_vmalloc(unsigned short kho_flags)
+{
+ unsigned int vm_flags = 0;
+
+ if (kho_flags & KHO_VMALLOC_ALLOC)
+ vm_flags |= VM_ALLOC;
+ if (kho_flags & KHO_VMALLOC_HUGE_VMAP)
+ vm_flags |= VM_ALLOW_HUGE_VMAP;
+
+ return vm_flags;
+}
+
+static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur)
+{
+ struct kho_vmalloc_chunk *chunk;
+ int err;
+
+ chunk = (struct kho_vmalloc_chunk *)get_zeroed_page(GFP_KERNEL);
+ if (!chunk)
+ return NULL;
+
+ err = kho_preserve_pages(virt_to_page(chunk), 1);
+ if (err)
+ goto err_free;
+ if (cur)
+ KHOSER_STORE_PTR(cur->hdr.next, chunk);
+ return chunk;
+
+err_free:
+ free_page((unsigned long)chunk);
+ return NULL;
+}
+
+static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk)
+{
+ struct kho_mem_track *track = &kho_out.ser.track;
+ unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
+
+ __kho_unpreserve(track, pfn, pfn + 1);
+
+ for (int i = 0; chunk->phys[i]; i++) {
+ pfn = PHYS_PFN(chunk->phys[i]);
+ __kho_unpreserve(track, pfn, pfn + 1);
+ }
+}
+
+static void kho_vmalloc_free_chunks(struct kho_vmalloc *kho_vmalloc)
+{
+ struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(kho_vmalloc->first);
+
+ while (chunk) {
+ struct kho_vmalloc_chunk *tmp = chunk;
+
+ kho_vmalloc_unpreserve_chunk(chunk);
+
+ chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
+ free_page((unsigned long)tmp);
+ }
+}
+
+/**
+ * kho_preserve_vmalloc - preserve memory allocated with vmalloc() across kexec
+ * @ptr: pointer to the area in vmalloc address space
+ * @preservation: placeholder for preservation metadata
+ *
+ * Instructs KHO to preserve the area in vmalloc address space at @ptr. The
+ * physical pages mapped at @ptr will be preserved and on successful return
+ * @preservation will hold the physical address of a structure that describes
+ * the preservation.
+ *
+ * NOTE: The memory allocated with vmalloc_node() variants cannot be reliably
+ * restored on the same node
+ *
+ * Return: 0 on success, error code on failure
+ */
+int kho_preserve_vmalloc(void *ptr, struct kho_vmalloc *preservation)
+{
+ struct kho_vmalloc_chunk *chunk;
+ struct vm_struct *vm = find_vm_area(ptr);
+ unsigned int order, flags, nr_contig_pages;
+ unsigned int idx = 0;
+ int err;
+
+ if (!vm)
+ return -EINVAL;
+
+ if (vm->flags & ~KHO_VMALLOC_SUPPORTED_FLAGS)
+ return -EOPNOTSUPP;
+
+ flags = vmalloc_flags_to_kho(vm->flags);
+ order = get_vm_area_page_order(vm);
+
+ chunk = new_vmalloc_chunk(NULL);
+ if (!chunk)
+ return -ENOMEM;
+ KHOSER_STORE_PTR(preservation->first, chunk);
+
+ nr_contig_pages = (1 << order);
+ for (int i = 0; i < vm->nr_pages; i += nr_contig_pages) {
+ phys_addr_t phys = page_to_phys(vm->pages[i]);
+
+ err = kho_preserve_pages(vm->pages[i], nr_contig_pages);
+ if (err)
+ goto err_free;
+
+ chunk->phys[idx++] = phys;
+ if (idx == ARRAY_SIZE(chunk->phys)) {
+ chunk = new_vmalloc_chunk(chunk);
+ if (!chunk)
+ goto err_free;
+ idx = 0;
+ }
+ }
+
+ preservation->total_pages = vm->nr_pages;
+ preservation->flags = flags;
+ preservation->order = order;
+
+ return 0;
+
+err_free:
+ kho_vmalloc_free_chunks(preservation);
+ return err;
+}
+EXPORT_SYMBOL_GPL(kho_preserve_vmalloc);
+
+/**
+ * kho_restore_vmalloc - recreates and populates an area in vmalloc address
+ * space from the preserved memory.
+ * @preservation: preservation metadata.
+ *
+ * Recreates an area in vmalloc address space and populates it with memory that
+ * was preserved using kho_preserve_vmalloc().
+ *
+ * Return: pointer to the area in the vmalloc address space, NULL on failure.
+ */
+void *kho_restore_vmalloc(const struct kho_vmalloc *preservation)
+{
+ struct kho_vmalloc_chunk *chunk = KHOSER_LOAD_PTR(preservation->first);
+ unsigned int align, order, shift, vm_flags;
+ unsigned long total_pages, contig_pages;
+ unsigned long addr, size;
+ struct vm_struct *area;
+ struct page **pages;
+ unsigned int idx = 0;
+ int err;
+
+ vm_flags = kho_flags_to_vmalloc(preservation->flags);
+ if (vm_flags & ~KHO_VMALLOC_SUPPORTED_FLAGS)
+ return NULL;
+
+ total_pages = preservation->total_pages;
+ pages = kvmalloc_array(total_pages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return NULL;
+ order = preservation->order;
+ contig_pages = (1 << order);
+ shift = PAGE_SHIFT + order;
+ align = 1 << shift;
+
+ while (chunk) {
+ struct page *page;
+
+ for (int i = 0; chunk->phys[i]; i++) {
+ phys_addr_t phys = chunk->phys[i];
+
+ if (idx + contig_pages > total_pages)
+ goto err_free_pages_array;
+
+ page = kho_restore_pages(phys, contig_pages);
+ if (!page)
+ goto err_free_pages_array;
+
+ for (int j = 0; j < contig_pages; j++)
+ pages[idx++] = page;
+
+ phys += contig_pages * PAGE_SIZE;
+ }
+
+ page = kho_restore_pages(virt_to_phys(chunk), 1);
+ if (!page)
+ goto err_free_pages_array;
+ chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
+ __free_page(page);
+ }
+
+ if (idx != total_pages)
+ goto err_free_pages_array;
+
+ area = __get_vm_area_node(total_pages * PAGE_SIZE, align, shift,
+ vm_flags, VMALLOC_START, VMALLOC_END,
+ NUMA_NO_NODE, GFP_KERNEL,
+ __builtin_return_address(0));
+ if (!area)
+ goto err_free_pages_array;
+
+ addr = (unsigned long)area->addr;
+ size = get_vm_area_size(area);
+ err = vmap_pages_range(addr, addr + size, PAGE_KERNEL, pages, shift);
+ if (err)
+ goto err_free_vm_area;
+
+ area->nr_pages = total_pages;
+ area->pages = pages;
+
+ return area->addr;
+
+err_free_vm_area:
+ free_vm_area(area);
+err_free_pages_array:
+ kvfree(pages);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(kho_restore_vmalloc);
/* Handling for debug/kho/out */
@@ -952,6 +1263,26 @@ static const void *kho_get_fdt(void)
}
/**
+ * is_kho_boot - check if current kernel was booted via KHO-enabled
+ * kexec
+ *
+ * This function checks if the current kernel was loaded through a kexec
+ * operation with KHO enabled, by verifying that a valid KHO FDT
+ * was passed.
+ *
+ * Note: This function returns reliable results only after
+ * kho_populate() has been called during early boot. Before that,
+ * it may return false even if KHO data is present.
+ *
+ * Return: true if booted via KHO-enabled kexec, false otherwise
+ */
+bool is_kho_boot(void)
+{
+ return !!kho_get_fdt();
+}
+EXPORT_SYMBOL_GPL(is_kho_boot);
+
+/**
* kho_retrieve_subtree - retrieve a preserved sub FDT by its name.
* @name: the name of the sub FDT passed to kho_add_subtree().
* @phys: if found, the physical address of the sub FDT is stored in @phys.
@@ -1233,7 +1564,7 @@ int kho_fill_kimage(struct kimage *image)
int err = 0;
struct kexec_buf scratch;
- if (!kho_enable)
+ if (!kho_out.finalized)
return 0;
image->kho.fdt = page_to_phys(kho_out.ser.fdt);