diff options
Diffstat (limited to 'kernel/liveupdate/luo_core.c')
| -rw-r--r-- | kernel/liveupdate/luo_core.c | 450 |
1 files changed, 450 insertions, 0 deletions
diff --git a/kernel/liveupdate/luo_core.c b/kernel/liveupdate/luo_core.c new file mode 100644 index 000000000000..f7ecaf7740d1 --- /dev/null +++ b/kernel/liveupdate/luo_core.c @@ -0,0 +1,450 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (c) 2025, Google LLC. + * Pasha Tatashin <pasha.tatashin@soleen.com> + */ + +/** + * DOC: Live Update Orchestrator (LUO) + * + * Live Update is a specialized, kexec-based reboot process that allows a + * running kernel to be updated from one version to another while preserving + * the state of selected resources and keeping designated hardware devices + * operational. For these devices, DMA activity may continue throughout the + * kernel transition. + * + * While the primary use case driving this work is supporting live updates of + * the Linux kernel when it is used as a hypervisor in cloud environments, the + * LUO framework itself is designed to be workload-agnostic. Live Update + * facilitates a full kernel version upgrade for any type of system. + * + * For example, a non-hypervisor system running an in-memory cache like + * memcached with many gigabytes of data can use LUO. The userspace service + * can place its cache into a memfd, have its state preserved by LUO, and + * restore it immediately after the kernel kexec. + * + * Whether the system is running virtual machines, containers, a + * high-performance database, or networking services, LUO's primary goal is to + * enable a full kernel update by preserving critical userspace state and + * keeping essential devices operational. + * + * The core of LUO is a mechanism that tracks the progress of a live update, + * along with a callback API that allows other kernel subsystems to participate + * in the process. Example subsystems that can hook into LUO include: kvm, + * iommu, interrupts, vfio, participating filesystems, and memory management. + * + * LUO uses Kexec Handover to transfer memory state from the current kernel to + * the next kernel. For more details see + * Documentation/core-api/kho/concepts.rst. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/atomic.h> +#include <linux/errno.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/kexec_handover.h> +#include <linux/kho/abi/luo.h> +#include <linux/kobject.h> +#include <linux/libfdt.h> +#include <linux/liveupdate.h> +#include <linux/miscdevice.h> +#include <linux/mm.h> +#include <linux/sizes.h> +#include <linux/string.h> +#include <linux/unaligned.h> + +#include "kexec_handover_internal.h" +#include "luo_internal.h" + +static struct { + bool enabled; + void *fdt_out; + void *fdt_in; + u64 liveupdate_num; +} luo_global; + +static int __init early_liveupdate_param(char *buf) +{ + return kstrtobool(buf, &luo_global.enabled); +} +early_param("liveupdate", early_liveupdate_param); + +static int __init luo_early_startup(void) +{ + phys_addr_t fdt_phys; + int err, ln_size; + const void *ptr; + + if (!kho_is_enabled()) { + if (liveupdate_enabled()) + pr_warn("Disabling liveupdate because KHO is disabled\n"); + luo_global.enabled = false; + return 0; + } + + /* Retrieve LUO subtree, and verify its format. */ + err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys); + if (err) { + if (err != -ENOENT) { + pr_err("failed to retrieve FDT '%s' from KHO: %pe\n", + LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err)); + return err; + } + + return 0; + } + + luo_global.fdt_in = phys_to_virt(fdt_phys); + err = fdt_node_check_compatible(luo_global.fdt_in, 0, + LUO_FDT_COMPATIBLE); + if (err) { + pr_err("FDT '%s' is incompatible with '%s' [%d]\n", + LUO_FDT_KHO_ENTRY_NAME, LUO_FDT_COMPATIBLE, err); + + return -EINVAL; + } + + ln_size = 0; + ptr = fdt_getprop(luo_global.fdt_in, 0, LUO_FDT_LIVEUPDATE_NUM, + &ln_size); + if (!ptr || ln_size != sizeof(luo_global.liveupdate_num)) { + pr_err("Unable to get live update number '%s' [%d]\n", + LUO_FDT_LIVEUPDATE_NUM, ln_size); + + return -EINVAL; + } + + luo_global.liveupdate_num = get_unaligned((u64 *)ptr); + pr_info("Retrieved live update data, liveupdate number: %lld\n", + luo_global.liveupdate_num); + + err = luo_session_setup_incoming(luo_global.fdt_in); + if (err) + return err; + + return 0; +} + +static int __init liveupdate_early_init(void) +{ + int err; + + err = luo_early_startup(); + if (err) { + luo_global.enabled = false; + luo_restore_fail("The incoming tree failed to initialize properly [%pe], disabling live update\n", + ERR_PTR(err)); + } + + return err; +} +early_initcall(liveupdate_early_init); + +/* Called during boot to create outgoing LUO fdt tree */ +static int __init luo_fdt_setup(void) +{ + const u64 ln = luo_global.liveupdate_num + 1; + void *fdt_out; + int err; + + fdt_out = kho_alloc_preserve(LUO_FDT_SIZE); + if (IS_ERR(fdt_out)) { + pr_err("failed to allocate/preserve FDT memory\n"); + return PTR_ERR(fdt_out); + } + + err = fdt_create(fdt_out, LUO_FDT_SIZE); + err |= fdt_finish_reservemap(fdt_out); + err |= fdt_begin_node(fdt_out, ""); + err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE); + err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln)); + err |= luo_session_setup_outgoing(fdt_out); + err |= fdt_end_node(fdt_out); + err |= fdt_finish(fdt_out); + if (err) + goto exit_free; + + err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out); + if (err) + goto exit_free; + luo_global.fdt_out = fdt_out; + + return 0; + +exit_free: + kho_unpreserve_free(fdt_out); + pr_err("failed to prepare LUO FDT: %d\n", err); + + return err; +} + +/* + * late initcall because it initializes the outgoing tree that is needed only + * once userspace starts using /dev/liveupdate. + */ +static int __init luo_late_startup(void) +{ + int err; + + if (!liveupdate_enabled()) + return 0; + + err = luo_fdt_setup(); + if (err) + luo_global.enabled = false; + + return err; +} +late_initcall(luo_late_startup); + +/* Public Functions */ + +/** + * liveupdate_reboot() - Kernel reboot notifier for live update final + * serialization. + * + * This function is invoked directly from the reboot() syscall pathway + * if kexec is in progress. + * + * If any callback fails, this function aborts KHO, undoes the freeze() + * callbacks, and returns an error. + */ +int liveupdate_reboot(void) +{ + int err; + + if (!liveupdate_enabled()) + return 0; + + err = luo_session_serialize(); + if (err) + return err; + + err = kho_finalize(); + if (err) { + pr_err("kho_finalize failed %d\n", err); + /* + * kho_finalize() may return libfdt errors, to aboid passing to + * userspace unknown errors, change this to EAGAIN. + */ + err = -EAGAIN; + } + + return err; +} + +/** + * liveupdate_enabled - Check if the live update feature is enabled. + * + * This function returns the state of the live update feature flag, which + * can be controlled via the ``liveupdate`` kernel command-line parameter. + * + * @return true if live update is enabled, false otherwise. + */ +bool liveupdate_enabled(void) +{ + return luo_global.enabled; +} + +/** + * DOC: LUO ioctl Interface + * + * The IOCTL user-space control interface for the LUO subsystem. + * It registers a character device, typically found at ``/dev/liveupdate``, + * which allows a userspace agent to manage the LUO state machine and its + * associated resources, such as preservable file descriptors. + * + * To ensure that the state machine is controlled by a single entity, access + * to this device is exclusive: only one process is permitted to have + * ``/dev/liveupdate`` open at any given time. Subsequent open attempts will + * fail with -EBUSY until the first process closes its file descriptor. + * This singleton model simplifies state management by preventing conflicting + * commands from multiple userspace agents. + */ + +struct luo_device_state { + struct miscdevice miscdev; + atomic_t in_use; +}; + +static int luo_ioctl_create_session(struct luo_ucmd *ucmd) +{ + struct liveupdate_ioctl_create_session *argp = ucmd->cmd; + struct file *file; + int err; + + argp->fd = get_unused_fd_flags(O_CLOEXEC); + if (argp->fd < 0) + return argp->fd; + + err = luo_session_create(argp->name, &file); + if (err) + goto err_put_fd; + + err = luo_ucmd_respond(ucmd, sizeof(*argp)); + if (err) + goto err_put_file; + + fd_install(argp->fd, file); + + return 0; + +err_put_file: + fput(file); +err_put_fd: + put_unused_fd(argp->fd); + + return err; +} + +static int luo_ioctl_retrieve_session(struct luo_ucmd *ucmd) +{ + struct liveupdate_ioctl_retrieve_session *argp = ucmd->cmd; + struct file *file; + int err; + + argp->fd = get_unused_fd_flags(O_CLOEXEC); + if (argp->fd < 0) + return argp->fd; + + err = luo_session_retrieve(argp->name, &file); + if (err < 0) + goto err_put_fd; + + err = luo_ucmd_respond(ucmd, sizeof(*argp)); + if (err) + goto err_put_file; + + fd_install(argp->fd, file); + + return 0; + +err_put_file: + fput(file); +err_put_fd: + put_unused_fd(argp->fd); + + return err; +} + +static int luo_open(struct inode *inodep, struct file *filep) +{ + struct luo_device_state *ldev = container_of(filep->private_data, + struct luo_device_state, + miscdev); + + if (atomic_cmpxchg(&ldev->in_use, 0, 1)) + return -EBUSY; + + /* Always return -EIO to user if deserialization fail */ + if (luo_session_deserialize()) { + atomic_set(&ldev->in_use, 0); + return -EIO; + } + + return 0; +} + +static int luo_release(struct inode *inodep, struct file *filep) +{ + struct luo_device_state *ldev = container_of(filep->private_data, + struct luo_device_state, + miscdev); + atomic_set(&ldev->in_use, 0); + + return 0; +} + +union ucmd_buffer { + struct liveupdate_ioctl_create_session create; + struct liveupdate_ioctl_retrieve_session retrieve; +}; + +struct luo_ioctl_op { + unsigned int size; + unsigned int min_size; + unsigned int ioctl_num; + int (*execute)(struct luo_ucmd *ucmd); +}; + +#define IOCTL_OP(_ioctl, _fn, _struct, _last) \ + [_IOC_NR(_ioctl) - LIVEUPDATE_CMD_BASE] = { \ + .size = sizeof(_struct) + \ + BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \ + sizeof(_struct)), \ + .min_size = offsetofend(_struct, _last), \ + .ioctl_num = _ioctl, \ + .execute = _fn, \ + } + +static const struct luo_ioctl_op luo_ioctl_ops[] = { + IOCTL_OP(LIVEUPDATE_IOCTL_CREATE_SESSION, luo_ioctl_create_session, + struct liveupdate_ioctl_create_session, name), + IOCTL_OP(LIVEUPDATE_IOCTL_RETRIEVE_SESSION, luo_ioctl_retrieve_session, + struct liveupdate_ioctl_retrieve_session, name), +}; + +static long luo_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + const struct luo_ioctl_op *op; + struct luo_ucmd ucmd = {}; + union ucmd_buffer buf; + unsigned int nr; + int err; + + nr = _IOC_NR(cmd); + if (nr < LIVEUPDATE_CMD_BASE || + (nr - LIVEUPDATE_CMD_BASE) >= ARRAY_SIZE(luo_ioctl_ops)) { + return -EINVAL; + } + + ucmd.ubuffer = (void __user *)arg; + err = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); + if (err) + return err; + + op = &luo_ioctl_ops[nr - LIVEUPDATE_CMD_BASE]; + if (op->ioctl_num != cmd) + return -ENOIOCTLCMD; + if (ucmd.user_size < op->min_size) + return -EINVAL; + + ucmd.cmd = &buf; + err = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer, + ucmd.user_size); + if (err) + return err; + + return op->execute(&ucmd); +} + +static const struct file_operations luo_fops = { + .owner = THIS_MODULE, + .open = luo_open, + .release = luo_release, + .unlocked_ioctl = luo_ioctl, +}; + +static struct luo_device_state luo_dev = { + .miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "liveupdate", + .fops = &luo_fops, + }, + .in_use = ATOMIC_INIT(0), +}; + +static int __init liveupdate_ioctl_init(void) +{ + if (!liveupdate_enabled()) + return 0; + + return misc_register(&luo_dev.miscdev); +} +late_initcall(liveupdate_ioctl_init); |
