summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/hugetlb.h5
-rw-r--r--include/asm-generic/memory_model.h10
-rw-r--r--include/asm-generic/pgalloc.h11
-rw-r--r--include/asm-generic/syscall.h30
-rw-r--r--include/dt-bindings/arm/qcom,ids.h1
-rw-r--r--include/dt-bindings/clock/stm32h7-clks.h4
-rw-r--r--include/dt-bindings/reset/thead,th1520-reset.h16
-rw-r--r--include/linux/bus/stm32_firewall_device.h15
-rw-r--r--include/linux/compiler_types.h8
-rw-r--r--include/linux/cpuset.h9
-rw-r--r--include/linux/crash_core.h7
-rw-r--r--include/linux/crash_dump.h2
-rw-r--r--include/linux/damon.h6
-rw-r--r--include/linux/firmware/imx/sm.h19
-rw-r--r--include/linux/firmware/samsung/exynos-acpm-protocol.h6
-rw-r--r--include/linux/fs.h26
-rw-r--r--include/linux/gfp.h8
-rw-r--r--include/linux/habanalabs/hl_boot_if.h2
-rw-r--r--include/linux/huge_mm.h17
-rw-r--r--include/linux/hugetlb.h15
-rw-r--r--include/linux/hung_task.h99
-rw-r--r--include/linux/kernel.h14
-rw-r--r--include/linux/kexec.h39
-rw-r--r--include/linux/kexec_handover.h109
-rw-r--r--include/linux/khugepaged.h8
-rw-r--r--include/linux/list.h8
-rw-r--r--include/linux/llist.h23
-rw-r--r--include/linux/maple_tree.h4
-rw-r--r--include/linux/memblock.h41
-rw-r--r--include/linux/memcontrol.h55
-rw-r--r--include/linux/memory.h10
-rw-r--r--include/linux/mempolicy.h4
-rw-r--r--include/linux/mm.h357
-rw-r--r--include/linux/mm_inline.h2
-rw-r--r--include/linux/mm_types.h36
-rw-r--r--include/linux/mmap_lock.h227
-rw-r--r--include/linux/mmzone.h88
-rw-r--r--include/linux/mount.h3
-rw-r--r--include/linux/numa_memblks.h1
-rw-r--r--include/linux/oid_registry.h1
-rw-r--r--include/linux/overflow.h25
-rw-r--r--include/linux/page-flags-layout.h4
-rw-r--r--include/linux/page-flags.h21
-rw-r--r--include/linux/pagemap.h91
-rw-r--r--include/linux/percpu-defs.h2
-rw-r--r--include/linux/pgtable.h131
-rw-r--r--include/linux/ptdump.h15
-rw-r--r--include/linux/relay.h3
-rw-r--r--include/linux/reset.h6
-rw-r--r--include/linux/rio_drv.h5
-rw-r--r--include/linux/rmap.h2
-rw-r--r--include/linux/scatterlist.h23
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/sched/task_stack.h2
-rw-r--r--include/linux/scmi_imx_protocol.h42
-rw-r--r--include/linux/semaphore.h15
-rw-r--r--include/linux/soc/qcom/llcc-qcom.h8
-rw-r--r--include/linux/soc/samsung/exynos-regs-pmu.h11
-rw-r--r--include/linux/sort.h10
-rw-r--r--include/linux/swap.h12
-rw-r--r--include/linux/turris-signing-key.h35
-rw-r--r--include/linux/types.h4
-rw-r--r--include/linux/unroll.h4
-rw-r--r--include/linux/uprobes.h6
-rw-r--r--include/linux/util_macros.h69
-rw-r--r--include/linux/xarray.h24
-rw-r--r--include/linux/zpool.h4
-rw-r--r--include/linux/zsmalloc.h3
-rw-r--r--include/soc/qcom/qcom-spmi-pmic.h2
-rw-r--r--include/trace/events/huge_memory.h12
-rw-r--r--include/trace/events/mmflags.h4
-rw-r--r--include/trace/events/sched.h33
-rw-r--r--include/uapi/linux/fs.h1
-rw-r--r--include/uapi/linux/ptrace.h7
-rw-r--r--include/video/mach64.h3
75 files changed, 1403 insertions, 588 deletions
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 2afc95bf1655..3e0a8fe9b108 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -5,11 +5,6 @@
#include <linux/swap.h>
#include <linux/swapops.h>
-static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
-{
- return mk_pte(page, pgprot);
-}
-
static inline unsigned long huge_pte_write(pte_t pte)
{
return pte_write(pte);
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index a3b5029aebbd..74d0077cc5fa 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -30,7 +30,15 @@ static inline int pfn_valid(unsigned long pfn)
return pfn >= pfn_offset && (pfn - pfn_offset) < max_mapnr;
}
#define pfn_valid pfn_valid
-#endif
+
+#ifndef for_each_valid_pfn
+#define for_each_valid_pfn(pfn, start_pfn, end_pfn) \
+ for ((pfn) = max_t(unsigned long, (start_pfn), ARCH_PFN_OFFSET); \
+ (pfn) < min_t(unsigned long, (end_pfn), \
+ ARCH_PFN_OFFSET + max_mapnr); \
+ (pfn)++)
+#endif /* for_each_valid_pfn */
+#endif /* valid_pfn */
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 892ece4558a2..3c8ec3bfea44 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -23,6 +23,11 @@ static inline pte_t *__pte_alloc_one_kernel_noprof(struct mm_struct *mm)
if (!ptdesc)
return NULL;
+ if (!pagetable_pte_ctor(mm, ptdesc)) {
+ pagetable_free(ptdesc);
+ return NULL;
+ }
+
return ptdesc_address(ptdesc);
}
#define __pte_alloc_one_kernel(...) alloc_hooks(__pte_alloc_one_kernel_noprof(__VA_ARGS__))
@@ -48,7 +53,7 @@ static inline pte_t *pte_alloc_one_kernel_noprof(struct mm_struct *mm)
*/
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
- pagetable_free(virt_to_ptdesc(pte));
+ pagetable_dtor_free(virt_to_ptdesc(pte));
}
/**
@@ -70,7 +75,7 @@ static inline pgtable_t __pte_alloc_one_noprof(struct mm_struct *mm, gfp_t gfp)
ptdesc = pagetable_alloc_noprof(gfp, 0);
if (!ptdesc)
return NULL;
- if (!pagetable_pte_ctor(ptdesc)) {
+ if (!pagetable_pte_ctor(mm, ptdesc)) {
pagetable_free(ptdesc);
return NULL;
}
@@ -137,7 +142,7 @@ static inline pmd_t *pmd_alloc_one_noprof(struct mm_struct *mm, unsigned long ad
ptdesc = pagetable_alloc_noprof(gfp, 0);
if (!ptdesc)
return NULL;
- if (!pagetable_pmd_ctor(ptdesc)) {
+ if (!pagetable_pmd_ctor(mm, ptdesc)) {
pagetable_free(ptdesc);
return NULL;
}
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 182b039ce5fa..c5a3ad53beec 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -38,6 +38,20 @@ struct pt_regs;
int syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
/**
+ * syscall_set_nr - change the system call a task is executing
+ * @task: task of interest, must be blocked
+ * @regs: task_pt_regs() of @task
+ * @nr: system call number
+ *
+ * Changes the system call number @task is about to execute.
+ *
+ * It's only valid to call this when @task is stopped for tracing on
+ * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
+ */
+void syscall_set_nr(struct task_struct *task, struct pt_regs *regs, int nr);
+
+/**
* syscall_rollback - roll back registers after an aborted system call
* @task: task of interest, must be in system call exit tracing
* @regs: task_pt_regs() of @task
@@ -118,6 +132,22 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
unsigned long *args);
/**
+ * syscall_set_arguments - change system call parameter value
+ * @task: task of interest, must be in system call entry tracing
+ * @regs: task_pt_regs() of @task
+ * @args: array of argument values to store
+ *
+ * Changes 6 arguments to the system call.
+ * The first argument gets value @args[0], and so on.
+ *
+ * It's only valid to call this when @task is stopped for tracing on
+ * entry to a system call, due to %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_AUDIT.
+ */
+void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
+ const unsigned long *args);
+
+/**
* syscall_get_arch - return the AUDIT_ARCH for the current system call
* @task: task of interest, must be blocked
*
diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h
index 1b3e0176dcb7..897b8135dc12 100644
--- a/include/dt-bindings/arm/qcom,ids.h
+++ b/include/dt-bindings/arm/qcom,ids.h
@@ -277,6 +277,7 @@
#define QCOM_ID_IPQ5302 595
#define QCOM_ID_QCS8550 603
#define QCOM_ID_QCM8550 604
+#define QCOM_ID_SM8750 618
#define QCOM_ID_IPQ5300 624
#define QCOM_ID_IPQ5321 650
#define QCOM_ID_IPQ5424 651
diff --git a/include/dt-bindings/clock/stm32h7-clks.h b/include/dt-bindings/clock/stm32h7-clks.h
index 6637272b3242..330b39c2c303 100644
--- a/include/dt-bindings/clock/stm32h7-clks.h
+++ b/include/dt-bindings/clock/stm32h7-clks.h
@@ -126,8 +126,8 @@
#define ADC3_CK 128
#define DSI_CK 129
#define LTDC_CK 130
-#define USART8_CK 131
-#define USART7_CK 132
+#define UART8_CK 131
+#define UART7_CK 132
#define HDMICEC_CK 133
#define I2C3_CK 134
#define I2C2_CK 135
diff --git a/include/dt-bindings/reset/thead,th1520-reset.h b/include/dt-bindings/reset/thead,th1520-reset.h
new file mode 100644
index 000000000000..00459f160489
--- /dev/null
+++ b/include/dt-bindings/reset/thead,th1520-reset.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2024 Samsung Electronics Co., Ltd.
+ * Author: Michal Wilczynski <m.wilczynski@samsung.com>
+ */
+
+#ifndef _DT_BINDINGS_TH1520_RESET_H
+#define _DT_BINDINGS_TH1520_RESET_H
+
+#define TH1520_RESET_ID_GPU 0
+#define TH1520_RESET_ID_GPU_CLKGEN 1
+#define TH1520_RESET_ID_NPU 2
+#define TH1520_RESET_ID_WDT0 3
+#define TH1520_RESET_ID_WDT1 4
+
+#endif /* _DT_BINDINGS_TH1520_RESET_H */
diff --git a/include/linux/bus/stm32_firewall_device.h b/include/linux/bus/stm32_firewall_device.h
index 5178b72bc920..eaa7a3f54450 100644
--- a/include/linux/bus/stm32_firewall_device.h
+++ b/include/linux/bus/stm32_firewall_device.h
@@ -114,27 +114,30 @@ void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 su
#else /* CONFIG_STM32_FIREWALL */
-int stm32_firewall_get_firewall(struct device_node *np, struct stm32_firewall *firewall,
- unsigned int nb_firewall)
+static inline int stm32_firewall_get_firewall(struct device_node *np,
+ struct stm32_firewall *firewall,
+ unsigned int nb_firewall)
{
return -ENODEV;
}
-int stm32_firewall_grant_access(struct stm32_firewall *firewall)
+static inline int stm32_firewall_grant_access(struct stm32_firewall *firewall)
{
return -ENODEV;
}
-void stm32_firewall_release_access(struct stm32_firewall *firewall)
+static inline void stm32_firewall_release_access(struct stm32_firewall *firewall)
{
}
-int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id)
+static inline int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall,
+ u32 subsystem_id)
{
return -ENODEV;
}
-void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id)
+static inline void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall,
+ u32 subsystem_id)
{
}
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 20881cc761fa..2b77d12e07b2 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -530,6 +530,12 @@ struct ftrace_likely_data {
sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
#ifdef __OPTIMIZE__
+/*
+ * #ifdef __OPTIMIZE__ is only a good approximation; for instance "make
+ * CFLAGS_foo.o=-Og" defines __OPTIMIZE__, does not elide the conditional code
+ * and can break compilation with wrong error message(s). Combine with
+ * -U__OPTIMIZE__ when needed.
+ */
# define __compiletime_assert(condition, msg, prefix, suffix) \
do { \
/* \
@@ -543,7 +549,7 @@ struct ftrace_likely_data {
prefix ## suffix(); \
} while (0)
#else
-# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0)
+# define __compiletime_assert(condition, msg, prefix, suffix) ((void)(condition))
#endif
#define _compiletime_assert(condition, msg, prefix, suffix) \
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 5466c96a33db..2ddb256187b5 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -82,11 +82,11 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
void cpuset_init_current_mems_allowed(void);
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
-extern bool cpuset_node_allowed(int node, gfp_t gfp_mask);
+extern bool cpuset_current_node_allowed(int node, gfp_t gfp_mask);
static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
{
- return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
+ return cpuset_current_node_allowed(zone_to_nid(z), gfp_mask);
}
static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
@@ -173,6 +173,7 @@ static inline void set_mems_allowed(nodemask_t nodemask)
task_unlock(current);
}
+extern bool cpuset_node_allowed(struct cgroup *cgroup, int nid);
#else /* !CONFIG_CPUSETS */
static inline bool cpusets_enabled(void) { return false; }
@@ -293,6 +294,10 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
return false;
}
+static inline bool cpuset_node_allowed(struct cgroup *cgroup, int nid)
+{
+ return true;
+}
#endif /* !CONFIG_CPUSETS */
#endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 44305336314e..d35726d6a415 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -34,7 +34,12 @@ static inline void arch_kexec_protect_crashkres(void) { }
static inline void arch_kexec_unprotect_crashkres(void) { }
#endif
-
+#ifdef CONFIG_CRASH_DM_CRYPT
+int crash_load_dm_crypt_keys(struct kimage *image);
+ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos);
+#else
+static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; }
+#endif
#ifndef arch_crash_handle_hotplug_event
static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { }
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 2f2555e6407c..dd6fc3b2133b 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -15,6 +15,8 @@
extern unsigned long long elfcorehdr_addr;
extern unsigned long long elfcorehdr_size;
+extern unsigned long long dm_crypt_keys_addr;
+
#ifdef CONFIG_CRASH_DUMP
extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size);
extern void elfcorehdr_free(unsigned long long addr);
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 47e36e6ea203..a4011726cb3b 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -145,6 +145,8 @@ enum damos_action {
*
* @DAMOS_QUOTA_USER_INPUT: User-input value.
* @DAMOS_QUOTA_SOME_MEM_PSI_US: System level some memory PSI in us.
+ * @DAMOS_QUOTA_NODE_MEM_USED_BP: MemUsed ratio of a node.
+ * @DAMOS_QUOTA_NODE_MEM_FREE_BP: MemFree ratio of a node.
* @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics.
*
* Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported.
@@ -152,6 +154,8 @@ enum damos_action {
enum damos_quota_goal_metric {
DAMOS_QUOTA_USER_INPUT,
DAMOS_QUOTA_SOME_MEM_PSI_US,
+ DAMOS_QUOTA_NODE_MEM_USED_BP,
+ DAMOS_QUOTA_NODE_MEM_FREE_BP,
NR_DAMOS_QUOTA_GOAL_METRICS,
};
@@ -161,6 +165,7 @@ enum damos_quota_goal_metric {
* @target_value: Target value of @metric to achieve with the tuning.
* @current_value: Current value of @metric.
* @last_psi_total: Last measured total PSI
+ * @nid: Node id.
* @list: List head for siblings.
*
* Data structure for getting the current score of the quota tuning goal. The
@@ -179,6 +184,7 @@ struct damos_quota_goal {
/* metric-dependent fields */
union {
u64 last_psi_total;
+ int nid;
};
struct list_head list;
};
diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h
index 9b85a3f028d1..a8a17eeb7d90 100644
--- a/include/linux/firmware/imx/sm.h
+++ b/include/linux/firmware/imx/sm.h
@@ -8,6 +8,7 @@
#include <linux/bitfield.h>
#include <linux/errno.h>
+#include <linux/scmi_imx_protocol.h>
#include <linux/types.h>
#define SCMI_IMX_CTRL_PDM_CLK_SEL 0 /* AON PDM clock sel */
@@ -20,4 +21,22 @@
int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val);
int scmi_imx_misc_ctrl_set(u32 id, u32 val);
+int scmi_imx_cpu_start(u32 cpuid, bool start);
+int scmi_imx_cpu_started(u32 cpuid, bool *started);
+int scmi_imx_cpu_reset_vector_set(u32 cpuid, u64 vector, bool start, bool boot,
+ bool resume);
+
+enum scmi_imx_lmm_op {
+ SCMI_IMX_LMM_BOOT,
+ SCMI_IMX_LMM_POWER_ON,
+ SCMI_IMX_LMM_SHUTDOWN,
+};
+
+/* For shutdown pperation */
+#define SCMI_IMX_LMM_OP_FORCEFUL 0
+#define SCMI_IMX_LMM_OP_GRACEFUL BIT(0)
+
+int scmi_imx_lmm_operation(u32 lmid, enum scmi_imx_lmm_op op, u32 flags);
+int scmi_imx_lmm_info(u32 lmid, struct scmi_imx_lmm_info *info);
+int scmi_imx_lmm_reset_vector_set(u32 lmid, u32 cpuid, u32 flags, u64 vector);
#endif
diff --git a/include/linux/firmware/samsung/exynos-acpm-protocol.h b/include/linux/firmware/samsung/exynos-acpm-protocol.h
index 76255b5d06b1..f628bf1862c2 100644
--- a/include/linux/firmware/samsung/exynos-acpm-protocol.h
+++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h
@@ -11,6 +11,7 @@
#include <linux/types.h>
struct acpm_handle;
+struct device_node;
struct acpm_pmic_ops {
int (*read_reg)(const struct acpm_handle *handle,
@@ -44,6 +45,7 @@ struct acpm_handle {
struct device;
-const struct acpm_handle *devm_acpm_get_by_phandle(struct device *dev,
- const char *property);
+const struct acpm_handle *devm_acpm_get_by_node(struct device *dev,
+ struct device_node *np);
+
#endif /* __EXYNOS_ACPM_PROTOCOL_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0db87f8e676c..da86fcb11882 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1250,7 +1250,6 @@ extern int send_sigurg(struct file *file);
/* These sb flags are internal to the kernel */
#define SB_DEAD BIT(21)
#define SB_DYING BIT(24)
-#define SB_SUBMOUNT BIT(26)
#define SB_FORCE BIT(27)
#define SB_NOSEC BIT(28)
#define SB_BORN BIT(29)
@@ -2190,6 +2189,7 @@ struct file_operations {
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
unsigned int poll_flags);
+ int (*mmap_prepare)(struct vm_area_desc *);
} __randomize_layout;
/* Supports async buffered reads */
@@ -2259,11 +2259,35 @@ struct inode_operations {
struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
} ____cacheline_aligned;
+/* Did the driver provide valid mmap hook configuration? */
+static inline bool file_has_valid_mmap_hooks(struct file *file)
+{
+ bool has_mmap = file->f_op->mmap;
+ bool has_mmap_prepare = file->f_op->mmap_prepare;
+
+ /* Hooks are mutually exclusive. */
+ if (WARN_ON_ONCE(has_mmap && has_mmap_prepare))
+ return false;
+ if (!has_mmap && !has_mmap_prepare)
+ return false;
+
+ return true;
+}
+
static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
{
+ if (WARN_ON_ONCE(file->f_op->mmap_prepare))
+ return -EINVAL;
+
return file->f_op->mmap(file, vma);
}
+static inline int __call_mmap_prepare(struct file *file,
+ struct vm_area_desc *desc)
+{
+ return file->f_op->mmap_prepare(desc);
+}
+
extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c9fa6309c903..be160e8d8bcb 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -45,13 +45,13 @@ static inline bool gfpflags_allow_spinning(const gfp_t gfp_flags)
* !__GFP_DIRECT_RECLAIM -> direct claim is not allowed.
* !__GFP_KSWAPD_RECLAIM -> it's not safe to wake up kswapd.
* All GFP_* flags including GFP_NOWAIT use one or both flags.
- * try_alloc_pages() is the only API that doesn't specify either flag.
+ * alloc_pages_nolock() is the only API that doesn't specify either flag.
*
* This is stronger than GFP_NOWAIT or GFP_ATOMIC because
* those are guaranteed to never block on a sleeping lock.
* Here we are enforcing that the allocation doesn't ever spin
* on any locks (i.e. only trylocks). There is no high level
- * GFP_$FOO flag for this use in try_alloc_pages() as the
+ * GFP_$FOO flag for this use in alloc_pages_nolock() as the
* regular page allocator doesn't fully support this
* allocation mode.
*/
@@ -354,8 +354,8 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp,
}
#define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__))
-struct page *try_alloc_pages_noprof(int nid, unsigned int order);
-#define try_alloc_pages(...) alloc_hooks(try_alloc_pages_noprof(__VA_ARGS__))
+struct page *alloc_pages_nolock_noprof(int nid, unsigned int order);
+#define alloc_pages_nolock(...) alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__))
extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order);
#define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__))
diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h
index d2a9fc96424b..af5fb4ad77eb 100644
--- a/include/linux/habanalabs/hl_boot_if.h
+++ b/include/linux/habanalabs/hl_boot_if.h
@@ -295,7 +295,7 @@ enum cpu_boot_dev_sts {
* Initialized in: linux
*
* CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from
- * previleged entity. FW sets this status
+ * privileged entity. FW sets this status
* bit for host. If this bit is set then
* GIC can not be accessed from host.
* Initialized in: linux
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e893d546a49f..2f190c90192d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -395,7 +395,7 @@ static inline int split_huge_page(struct page *page)
void deferred_split_folio(struct folio *folio, bool partially_mapped);
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long address, bool freeze, struct folio *folio);
+ unsigned long address, bool freeze);
#define split_huge_pmd(__vma, __pmd, __address) \
do { \
@@ -403,12 +403,11 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \
|| pmd_devmap(*____pmd)) \
__split_huge_pmd(__vma, __pmd, __address, \
- false, NULL); \
+ false); \
} while (0)
-
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
- bool freeze, struct folio *folio);
+ bool freeze);
void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
unsigned long address);
@@ -495,15 +494,13 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
void mm_put_huge_zero_folio(struct mm_struct *mm);
-#define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
-
static inline bool thp_migration_supported(void)
{
return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
}
void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmd, bool freeze, struct folio *folio);
+ pmd_t *pmd, bool freeze);
bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmdp, struct folio *folio);
@@ -578,12 +575,12 @@ static inline void deferred_split_folio(struct folio *folio, bool partially_mapp
do { } while (0)
static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long address, bool freeze, struct folio *folio) {}
+ unsigned long address, bool freeze) {}
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
- unsigned long address, bool freeze, struct folio *folio) {}
+ unsigned long address, bool freeze) {}
static inline void split_huge_pmd_locked(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd,
- bool freeze, struct folio *folio) {}
+ bool freeze) {}
static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 4861a7e304bb..0598f36931de 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -14,6 +14,7 @@
#include <linux/pgtable.h>
#include <linux/gfp.h>
#include <linux/userfaultfd_k.h>
+#include <linux/nodemask.h>
struct ctl_table;
struct user_struct;
@@ -128,12 +129,12 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
struct vm_area_struct *, struct vm_area_struct *);
void unmap_hugepage_range(struct vm_area_struct *,
- unsigned long, unsigned long, struct page *,
- zap_flags_t);
+ unsigned long start, unsigned long end,
+ struct folio *, zap_flags_t);
void __unmap_hugepage_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long start, unsigned long end,
- struct page *ref_page, zap_flags_t zap_flags);
+ struct folio *, zap_flags_t zap_flags);
void hugetlb_report_meminfo(struct seq_file *);
int hugetlb_report_node_meminfo(char *buf, int len, int nid);
void hugetlb_show_meminfo_node(int nid);
@@ -176,6 +177,8 @@ extern struct list_head huge_boot_pages[MAX_NUMNODES];
void hugetlb_bootmem_alloc(void);
bool hugetlb_bootmem_allocated(void);
+extern nodemask_t hugetlb_bootmem_nodes;
+void hugetlb_bootmem_set_nodes(void);
/* arch callbacks */
@@ -453,7 +456,7 @@ static inline long hugetlb_change_protection(
static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
- unsigned long end, struct page *ref_page,
+ unsigned long end, struct folio *folio,
zap_flags_t zap_flags)
{
BUG();
@@ -700,7 +703,7 @@ struct huge_bootmem_page {
bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
-int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
+int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
void wait_for_freed_hugetlb_folios(void);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
@@ -1088,7 +1091,7 @@ static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
return NULL;
}
-static inline int isolate_or_dissolve_huge_page(struct page *page,
+static inline int isolate_or_dissolve_huge_folio(struct folio *folio,
struct list_head *list)
{
return -ENOMEM;
diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h
new file mode 100644
index 000000000000..1bc2b3244613
--- /dev/null
+++ b/include/linux/hung_task.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Detect Hung Task: detecting tasks stuck in D state
+ *
+ * Copyright (C) 2025 Tongcheng Travel (www.ly.com)
+ * Author: Lance Yang <mingzhe.yang@ly.com>
+ */
+#ifndef __LINUX_HUNG_TASK_H
+#define __LINUX_HUNG_TASK_H
+
+#include <linux/bug.h>
+#include <linux/sched.h>
+#include <linux/compiler.h>
+
+/*
+ * @blocker: Combines lock address and blocking type.
+ *
+ * Since lock pointers are at least 4-byte aligned(32-bit) or 8-byte
+ * aligned(64-bit). This leaves the 2 least bits (LSBs) of the pointer
+ * always zero. So we can use these bits to encode the specific blocking
+ * type.
+ *
+ * Type encoding:
+ * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX)
+ * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM)
+ * 10 - Blocked on rt-mutex (BLOCKER_TYPE_RTMUTEX)
+ * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM)
+ */
+#define BLOCKER_TYPE_MUTEX 0x00UL
+#define BLOCKER_TYPE_SEM 0x01UL
+#define BLOCKER_TYPE_RTMUTEX 0x02UL
+#define BLOCKER_TYPE_RWSEM 0x03UL
+
+#define BLOCKER_TYPE_MASK 0x03UL
+
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+static inline void hung_task_set_blocker(void *lock, unsigned long type)
+{
+ unsigned long lock_ptr = (unsigned long)lock;
+
+ WARN_ON_ONCE(!lock_ptr);
+ WARN_ON_ONCE(READ_ONCE(current->blocker));
+
+ /*
+ * If the lock pointer matches the BLOCKER_TYPE_MASK, return
+ * without writing anything.
+ */
+ if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK))
+ return;
+
+ WRITE_ONCE(current->blocker, lock_ptr | type);
+}
+
+static inline void hung_task_clear_blocker(void)
+{
+ WARN_ON_ONCE(!READ_ONCE(current->blocker));
+
+ WRITE_ONCE(current->blocker, 0UL);
+}
+
+/*
+ * hung_task_get_blocker_type - Extracts blocker type from encoded blocker
+ * address.
+ *
+ * @blocker: Blocker pointer with encoded type (via LSB bits)
+ *
+ * Returns: BLOCKER_TYPE_MUTEX, BLOCKER_TYPE_SEM, etc.
+ */
+static inline unsigned long hung_task_get_blocker_type(unsigned long blocker)
+{
+ WARN_ON_ONCE(!blocker);
+
+ return blocker & BLOCKER_TYPE_MASK;
+}
+
+static inline void *hung_task_blocker_to_lock(unsigned long blocker)
+{
+ WARN_ON_ONCE(!blocker);
+
+ return (void *)(blocker & ~BLOCKER_TYPE_MASK);
+}
+#else
+static inline void hung_task_set_blocker(void *lock, unsigned long type)
+{
+}
+static inline void hung_task_clear_blocker(void)
+{
+}
+static inline unsigned long hung_task_get_blocker_type(unsigned long blocker)
+{
+ return 0UL;
+}
+static inline void *hung_task_blocker_to_lock(unsigned long blocker)
+{
+ return NULL;
+}
+#endif
+
+#endif /* __LINUX_HUNG_TASK_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index be2e8c0a187e..1cce1f6410a9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -33,6 +33,7 @@
#include <linux/sprintf.h>
#include <linux/static_call_types.h>
#include <linux/instruction_pointer.h>
+#include <linux/util_macros.h>
#include <linux/wordpart.h>
#include <asm/byteorder.h>
@@ -41,19 +42,6 @@
#define STACK_MAGIC 0xdeadbeef
-/* generic data direction definitions */
-#define READ 0
-#define WRITE 1
-
-#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL)
-
-#define u64_to_user_ptr(x) ( \
-{ \
- typecheck(u64, (x)); \
- (void __user *)(uintptr_t)(x); \
-} \
-)
-
struct completion;
struct user;
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 53ef1b6c8712..03f85ad03025 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -25,6 +25,10 @@
extern note_buf_t __percpu *crash_notes;
+#ifdef CONFIG_CRASH_DUMP
+#include <linux/prandom.h>
+#endif
+
#ifdef CONFIG_KEXEC_CORE
#include <linux/list.h>
#include <linux/compat.h>
@@ -169,6 +173,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image);
* @buf_min: The buffer can't be placed below this address.
* @buf_max: The buffer can't be placed above this address.
* @top_down: Allocate from top of memory.
+ * @random: Place the buffer at a random position.
*/
struct kexec_buf {
struct kimage *image;
@@ -180,8 +185,33 @@ struct kexec_buf {
unsigned long buf_min;
unsigned long buf_max;
bool top_down;
+#ifdef CONFIG_CRASH_DUMP
+ bool random;
+#endif
};
+
+#ifdef CONFIG_CRASH_DUMP
+static inline void kexec_random_range_start(unsigned long start,
+ unsigned long end,
+ struct kexec_buf *kbuf,
+ unsigned long *temp_start)
+{
+ unsigned short i;
+
+ if (kbuf->random) {
+ get_random_bytes(&i, sizeof(unsigned short));
+ *temp_start = start + (end - start) / USHRT_MAX * i;
+ }
+}
+#else
+static inline void kexec_random_range_start(unsigned long start,
+ unsigned long end,
+ struct kexec_buf *kbuf,
+ unsigned long *temp_start)
+{}
+#endif
+
int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf);
int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
void *buf, unsigned int size,
@@ -374,10 +404,19 @@ struct kimage {
bool is_ima_segment_index_set;
#endif
+ struct {
+ struct kexec_segment *scratch;
+ phys_addr_t fdt;
+ } kho;
+
/* Core ELF header buffer */
void *elf_headers;
unsigned long elf_headers_sz;
unsigned long elf_load_addr;
+
+ /* dm crypt keys buffer */
+ unsigned long dm_crypt_keys_addr;
+ unsigned long dm_crypt_keys_sz;
};
/* kexec interface functions */
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
new file mode 100644
index 000000000000..348844cffb13
--- /dev/null
+++ b/include/linux/kexec_handover.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_KEXEC_HANDOVER_H
+#define LINUX_KEXEC_HANDOVER_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+
+struct kho_scratch {
+ phys_addr_t addr;
+ phys_addr_t size;
+};
+
+/* KHO Notifier index */
+enum kho_event {
+ KEXEC_KHO_FINALIZE = 0,
+ KEXEC_KHO_ABORT = 1,
+};
+
+struct folio;
+struct notifier_block;
+
+#define DECLARE_KHOSER_PTR(name, type) \
+ union { \
+ phys_addr_t phys; \
+ type ptr; \
+ } name
+#define KHOSER_STORE_PTR(dest, val) \
+ ({ \
+ typeof(val) v = val; \
+ typecheck(typeof((dest).ptr), v); \
+ (dest).phys = virt_to_phys(v); \
+ })
+#define KHOSER_LOAD_PTR(src) \
+ ({ \
+ typeof(src) s = src; \
+ (typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \
+ })
+
+struct kho_serialization;
+
+#ifdef CONFIG_KEXEC_HANDOVER
+bool kho_is_enabled(void);
+
+int kho_preserve_folio(struct folio *folio);
+int kho_preserve_phys(phys_addr_t phys, size_t size);
+struct folio *kho_restore_folio(phys_addr_t phys);
+int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
+int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
+
+int register_kho_notifier(struct notifier_block *nb);
+int unregister_kho_notifier(struct notifier_block *nb);
+
+void kho_memory_init(void);
+
+void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys,
+ u64 scratch_len);
+#else
+static inline bool kho_is_enabled(void)
+{
+ return false;
+}
+
+static inline int kho_preserve_folio(struct folio *folio)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int kho_preserve_phys(phys_addr_t phys, size_t size)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline struct folio *kho_restore_folio(phys_addr_t phys)
+{
+ return NULL;
+}
+
+static inline int kho_add_subtree(struct kho_serialization *ser,
+ const char *name, void *fdt)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int register_kho_notifier(struct notifier_block *nb)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int unregister_kho_notifier(struct notifier_block *nb)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void kho_memory_init(void)
+{
+}
+
+static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
+ phys_addr_t scratch_phys, u64 scratch_len)
+{
+}
+#endif /* CONFIG_KEXEC_HANDOVER */
+
+#endif /* LINUX_KEXEC_HANDOVER_H */
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 1f46046080f5..b8d69cfbb58b 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -15,16 +15,8 @@ extern void khugepaged_enter_vma(struct vm_area_struct *vma,
unsigned long vm_flags);
extern void khugepaged_min_free_kbytes_update(void);
extern bool current_is_khugepaged(void);
-#ifdef CONFIG_SHMEM
extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
bool install_pmd);
-#else
-static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
- unsigned long addr, bool install_pmd)
-{
- return 0;
-}
-#endif
static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
diff --git a/include/linux/list.h b/include/linux/list.h
index 29a375889fb8..e7e28afd28f8 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -50,9 +50,9 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
* Performs the full set of list corruption checks before __list_add().
* On list corruption reports a warning, and returns false.
*/
-extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new,
- struct list_head *prev,
- struct list_head *next);
+bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next);
/*
* Performs list corruption checks before __list_add(). Returns false if a
@@ -93,7 +93,7 @@ static __always_inline bool __list_add_valid(struct list_head *new,
* Performs the full set of list corruption checks before __list_del_entry().
* On list corruption reports a warning, and returns false.
*/
-extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry);
+bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry);
/*
* Performs list corruption checks before __list_del_entry(). Returns false if a
diff --git a/include/linux/llist.h b/include/linux/llist.h
index 2c982ff7475a..27b17f64bcee 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -223,9 +223,26 @@ static inline struct llist_node *llist_next(struct llist_node *node)
return node->next;
}
-extern bool llist_add_batch(struct llist_node *new_first,
- struct llist_node *new_last,
- struct llist_head *head);
+/**
+ * llist_add_batch - add several linked entries in batch
+ * @new_first: first entry in batch to be added
+ * @new_last: last entry in batch to be added
+ * @head: the head for your lock-less list
+ *
+ * Return whether list is empty before adding.
+ */
+static inline bool llist_add_batch(struct llist_node *new_first,
+ struct llist_node *new_last,
+ struct llist_head *head)
+{
+ struct llist_node *first = READ_ONCE(head->first);
+
+ do {
+ new_last->next = first;
+ } while (!try_cmpxchg(&head->first, &first, new_first));
+
+ return !first;
+}
static inline bool __llist_add_batch(struct llist_node *new_first,
struct llist_node *new_last,
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index cbbcd18d4186..9ef129038224 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -463,6 +463,8 @@ struct ma_wr_state {
void __rcu **slots; /* mas->node->slots pointer */
void *entry; /* The entry to write */
void *content; /* The existing entry that is being overwritten */
+ unsigned char vacant_height; /* Height of lowest node with free space */
+ unsigned char sufficient_height;/* Height of lowest node with min sufficiency + 1 nodes */
};
#define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock))
@@ -498,6 +500,8 @@ struct ma_wr_state {
.mas = ma_state, \
.content = NULL, \
.entry = wr_entry, \
+ .vacant_height = 0, \
+ .sufficient_height = 0 \
}
#define MA_TOPIARY(name, tree) \
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index ef5a1ecc6e59..bb19a2534224 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -42,6 +42,14 @@ extern unsigned long long max_possible_pfn;
* kernel resource tree.
* @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are
* not initialized (only for reserved regions).
+ * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use,
+ * either explictitly with memblock_reserve_kern() or via memblock
+ * allocation APIs. All memblock allocations set this flag.
+ * @MEMBLOCK_KHO_SCRATCH: memory region that kexec can pass to the next
+ * kernel in handover mode. During early boot, we do not know about all
+ * memory reservations yet, so we get scratch memory from the previous
+ * kernel that we know is good to use. It is the only memory that
+ * allocations may happen from in this phase.
*/
enum memblock_flags {
MEMBLOCK_NONE = 0x0, /* No special request */
@@ -50,6 +58,8 @@ enum memblock_flags {
MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */
MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */
MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */
+ MEMBLOCK_RSRV_KERN = 0x20, /* memory reserved for kernel use */
+ MEMBLOCK_KHO_SCRATCH = 0x40, /* scratch memory for kexec handover */
};
/**
@@ -116,7 +126,19 @@ int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid,
int memblock_add(phys_addr_t base, phys_addr_t size);
int memblock_remove(phys_addr_t base, phys_addr_t size);
int memblock_phys_free(phys_addr_t base, phys_addr_t size);
-int memblock_reserve(phys_addr_t base, phys_addr_t size);
+int __memblock_reserve(phys_addr_t base, phys_addr_t size, int nid,
+ enum memblock_flags flags);
+
+static __always_inline int memblock_reserve(phys_addr_t base, phys_addr_t size)
+{
+ return __memblock_reserve(base, size, NUMA_NO_NODE, 0);
+}
+
+static __always_inline int memblock_reserve_kern(phys_addr_t base, phys_addr_t size)
+{
+ return __memblock_reserve(base, size, NUMA_NO_NODE, MEMBLOCK_RSRV_KERN);
+}
+
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
int memblock_physmem_add(phys_addr_t base, phys_addr_t size);
#endif
@@ -132,6 +154,8 @@ int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size);
+int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size);
+int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size);
void memblock_free(void *ptr, size_t size);
void reset_all_zones_managed_pages(void);
@@ -275,6 +299,11 @@ static inline bool memblock_is_driver_managed(struct memblock_region *m)
return m->flags & MEMBLOCK_DRIVER_MANAGED;
}
+static inline bool memblock_is_kho_scratch(struct memblock_region *m)
+{
+ return m->flags & MEMBLOCK_KHO_SCRATCH;
+}
+
int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
unsigned long *end_pfn);
void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
@@ -476,6 +505,7 @@ static inline __init_memblock bool memblock_bottom_up(void)
phys_addr_t memblock_phys_mem_size(void);
phys_addr_t memblock_reserved_size(void);
+phys_addr_t memblock_reserved_kern_size(phys_addr_t limit, int nid);
unsigned long memblock_estimated_nr_free_pages(void);
phys_addr_t memblock_start_of_DRAM(void);
phys_addr_t memblock_end_of_DRAM(void);
@@ -602,5 +632,14 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) { }
static inline void memtest_report_meminfo(struct seq_file *m) { }
#endif
+#ifdef CONFIG_MEMBLOCK_KHO_SCRATCH
+void memblock_set_kho_scratch_only(void);
+void memblock_clear_kho_scratch_only(void);
+void memmap_init_kho_scratch_pages(void);
+#else
+static inline void memblock_set_kho_scratch_only(void) { }
+static inline void memblock_clear_kho_scratch_only(void) { }
+static inline void memmap_init_kho_scratch_pages(void) {}
+#endif
#endif /* _LINUX_MEMBLOCK_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 53364526d877..f7848f73f41c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -903,19 +903,9 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
struct mem_cgroup *oom_domain);
void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
-void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx,
- int val);
-
/* idx can be of type enum memcg_stat_item or node_stat_item */
-static inline void mod_memcg_state(struct mem_cgroup *memcg,
- enum memcg_stat_item idx, int val)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __mod_memcg_state(memcg, idx, val);
- local_irq_restore(flags);
-}
+void mod_memcg_state(struct mem_cgroup *memcg,
+ enum memcg_stat_item idx, int val);
static inline void mod_memcg_page_state(struct page *page,
enum memcg_stat_item idx, int val)
@@ -952,19 +942,8 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
local_irq_restore(flags);
}
-void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
- unsigned long count);
-
-static inline void count_memcg_events(struct mem_cgroup *memcg,
- enum vm_event_item idx,
- unsigned long count)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __count_memcg_events(memcg, idx, count);
- local_irq_restore(flags);
-}
+void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
+ unsigned long count);
static inline void count_memcg_folio_events(struct folio *folio,
enum vm_event_item idx, unsigned long nr)
@@ -1057,6 +1036,7 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
return id;
}
+extern int mem_cgroup_init(void);
#else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0
@@ -1374,12 +1354,6 @@ static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
{
}
-static inline void __mod_memcg_state(struct mem_cgroup *memcg,
- enum memcg_stat_item idx,
- int nr)
-{
-}
-
static inline void mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx,
int nr)
@@ -1433,12 +1407,6 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
}
static inline void count_memcg_events(struct mem_cgroup *memcg,
- enum vm_event_item idx,
- unsigned long count)
-{
-}
-
-static inline void __count_memcg_events(struct mem_cgroup *memcg,
enum vm_event_item idx,
unsigned long count)
{
@@ -1472,6 +1440,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
{
return 0;
}
+
+static inline int mem_cgroup_init(void) { return 0; }
#endif /* CONFIG_MEMCG */
/*
@@ -1736,6 +1706,8 @@ static inline void count_objcg_events(struct obj_cgroup *objcg,
rcu_read_unlock();
}
+bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid);
+
#else
static inline bool mem_cgroup_kmem_disabled(void)
{
@@ -1793,6 +1765,15 @@ static inline void count_objcg_events(struct obj_cgroup *objcg,
{
}
+static inline ino_t page_cgroup_ino(struct page *page)
+{
+ return 0;
+}
+
+static inline bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid)
+{
+ return true;
+}
#endif /* CONFIG_MEMCG */
#if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP)
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 12daa6ec7d09..5ec4e6d209b9 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -149,6 +149,14 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
{
return 0;
}
+static inline int memory_block_advise_max_size(unsigned long size)
+{
+ return -ENODEV;
+}
+static inline unsigned long memory_block_advised_max_size(void)
+{
+ return 0;
+}
#else /* CONFIG_MEMORY_HOTPLUG */
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
@@ -181,6 +189,8 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
void memory_block_add_nid(struct memory_block *mem, int nid,
enum meminit_context context);
#endif /* CONFIG_NUMA */
+int memory_block_advise_max_size(unsigned long size);
+unsigned long memory_block_advised_max_size(void);
#endif /* CONFIG_MEMORY_HOTPLUG */
/*
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index ce9885e0178a..0fe96f3ab3ef 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
+#include <linux/node.h>
#include <linux/nodemask.h>
#include <linux/pagemap.h>
#include <uapi/linux/mempolicy.h>
@@ -178,6 +179,9 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol)
extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone);
+extern int mempolicy_set_node_perf(unsigned int node,
+ struct access_coordinate *coords);
+
#else
struct mempolicy {};
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e51dba8398f7..9e221ffcb868 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -12,6 +12,7 @@
#include <linux/rbtree.h>
#include <linux/atomic.h>
#include <linux/debug_locks.h>
+#include <linux/compiler.h>
#include <linux/mm_types.h>
#include <linux/mmap_lock.h>
#include <linux/range.h>
@@ -356,9 +357,7 @@ extern unsigned int kobjsize(const void *objp);
# define VM_SHADOW_STACK VM_NONE
#endif
-#if defined(CONFIG_X86)
-# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
-#elif defined(CONFIG_PPC64)
+#if defined(CONFIG_PPC64)
# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */
#elif defined(CONFIG_PARISC)
# define VM_GROWSUP VM_ARCH_1
@@ -670,204 +669,11 @@ static inline void vma_numab_state_init(struct vm_area_struct *vma) {}
static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
#endif /* CONFIG_NUMA_BALANCING */
-#ifdef CONFIG_PER_VMA_LOCK
-static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
-{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- static struct lock_class_key lockdep_key;
-
- lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0);
-#endif
- if (reset_refcnt)
- refcount_set(&vma->vm_refcnt, 0);
- vma->vm_lock_seq = UINT_MAX;
-}
-
-static inline bool is_vma_writer_only(int refcnt)
-{
- /*
- * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma
- * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on
- * a detached vma happens only in vma_mark_detached() and is a rare
- * case, therefore most of the time there will be no unnecessary wakeup.
- */
- return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1;
-}
-
-static inline void vma_refcount_put(struct vm_area_struct *vma)
-{
- /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
- struct mm_struct *mm = vma->vm_mm;
- int oldcnt;
-
- rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
- if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
-
- if (is_vma_writer_only(oldcnt - 1))
- rcuwait_wake_up(&mm->vma_writer_wait);
- }
-}
-
-/*
- * Try to read-lock a vma. The function is allowed to occasionally yield false
- * locked result to avoid performance overhead, in which case we fall back to
- * using mmap_lock. The function should never yield false unlocked result.
- * False locked result is possible if mm_lock_seq overflows or if vma gets
- * reused and attached to a different mm before we lock it.
- * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
- * detached.
- */
-static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
- int oldcnt;
-
- /*
- * Check before locking. A race might cause false locked result.
- * We can use READ_ONCE() for the mm_lock_seq here, and don't need
- * ACQUIRE semantics, because this is just a lockless check whose result
- * we don't rely on for anything - the mm_lock_seq read against which we
- * need ordering is below.
- */
- if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
- return NULL;
-
- /*
- * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
- * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
- * Acquire fence is required here to avoid reordering against later
- * vm_lock_seq check and checks inside lock_vma_under_rcu().
- */
- if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
- VMA_REF_LIMIT))) {
- /* return EAGAIN if vma got detached from under us */
- return oldcnt ? NULL : ERR_PTR(-EAGAIN);
- }
-
- rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
- /*
- * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
- * False unlocked result is impossible because we modify and check
- * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
- * modification invalidates all existing locks.
- *
- * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
- * racing with vma_end_write_all(), we only start reading from the VMA
- * after it has been unlocked.
- * This pairs with RELEASE semantics in vma_end_write_all().
- */
- if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
- vma_refcount_put(vma);
- return NULL;
- }
-
- return vma;
-}
-
/*
- * Use only while holding mmap read lock which guarantees that locking will not
- * fail (nobody can concurrently write-lock the vma). vma_start_read() should
- * not be used in such cases because it might fail due to mm_lock_seq overflow.
- * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ * These must be here rather than mmap_lock.h as dependent on vm_fault type,
+ * declared in this header.
*/
-static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
-{
- int oldcnt;
-
- mmap_assert_locked(vma->vm_mm);
- if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
- VMA_REF_LIMIT)))
- return false;
-
- rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
- return true;
-}
-
-/*
- * Use only while holding mmap read lock which guarantees that locking will not
- * fail (nobody can concurrently write-lock the vma). vma_start_read() should
- * not be used in such cases because it might fail due to mm_lock_seq overflow.
- * This functionality is used to obtain vma read lock and drop the mmap read lock.
- */
-static inline bool vma_start_read_locked(struct vm_area_struct *vma)
-{
- return vma_start_read_locked_nested(vma, 0);
-}
-
-static inline void vma_end_read(struct vm_area_struct *vma)
-{
- vma_refcount_put(vma);
-}
-
-/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
-static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
-{
- mmap_assert_write_locked(vma->vm_mm);
-
- /*
- * current task is holding mmap_write_lock, both vma->vm_lock_seq and
- * mm->mm_lock_seq can't be concurrently modified.
- */
- *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
- return (vma->vm_lock_seq == *mm_lock_seq);
-}
-
-void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq);
-
-/*
- * Begin writing to a VMA.
- * Exclude concurrent readers under the per-VMA lock until the currently
- * write-locked mmap_lock is dropped or downgraded.
- */
-static inline void vma_start_write(struct vm_area_struct *vma)
-{
- unsigned int mm_lock_seq;
-
- if (__is_vma_write_locked(vma, &mm_lock_seq))
- return;
-
- __vma_start_write(vma, mm_lock_seq);
-}
-
-static inline void vma_assert_write_locked(struct vm_area_struct *vma)
-{
- unsigned int mm_lock_seq;
-
- VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
-}
-
-static inline void vma_assert_locked(struct vm_area_struct *vma)
-{
- unsigned int mm_lock_seq;
-
- VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
- !__is_vma_write_locked(vma, &mm_lock_seq), vma);
-}
-
-/*
- * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
- * assertions should be made either under mmap_write_lock or when the object
- * has been isolated under mmap_write_lock, ensuring no competing writers.
- */
-static inline void vma_assert_attached(struct vm_area_struct *vma)
-{
- WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
-}
-
-static inline void vma_assert_detached(struct vm_area_struct *vma)
-{
- WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
-}
-
-static inline void vma_mark_attached(struct vm_area_struct *vma)
-{
- vma_assert_write_locked(vma);
- vma_assert_detached(vma);
- refcount_set_release(&vma->vm_refcnt, 1);
-}
-
-void vma_mark_detached(struct vm_area_struct *vma);
-
+#ifdef CONFIG_PER_VMA_LOCK
static inline void release_fault_lock(struct vm_fault *vmf)
{
if (vmf->flags & FAULT_FLAG_VMA_LOCK)
@@ -883,36 +689,7 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
else
mmap_assert_locked(vmf->vma->vm_mm);
}
-
-struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
- unsigned long address);
-
-#else /* CONFIG_PER_VMA_LOCK */
-
-static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
-static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
- struct vm_area_struct *vma)
- { return NULL; }
-static inline void vma_end_read(struct vm_area_struct *vma) {}
-static inline void vma_start_write(struct vm_area_struct *vma) {}
-static inline void vma_assert_write_locked(struct vm_area_struct *vma)
- { mmap_assert_write_locked(vma->vm_mm); }
-static inline void vma_assert_attached(struct vm_area_struct *vma) {}
-static inline void vma_assert_detached(struct vm_area_struct *vma) {}
-static inline void vma_mark_attached(struct vm_area_struct *vma) {}
-static inline void vma_mark_detached(struct vm_area_struct *vma) {}
-
-static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
- unsigned long address)
-{
- return NULL;
-}
-
-static inline void vma_assert_locked(struct vm_area_struct *vma)
-{
- mmap_assert_locked(vma->vm_mm);
-}
-
+#else
static inline void release_fault_lock(struct vm_fault *vmf)
{
mmap_read_unlock(vmf->vma->vm_mm);
@@ -922,7 +699,6 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
{
mmap_assert_locked(vmf->vma->vm_mm);
}
-
#endif /* CONFIG_PER_VMA_LOCK */
extern const struct vm_operations_struct vma_dummy_vm_ops;
@@ -1459,7 +1235,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
return pte;
}
-vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page);
+vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *page);
void set_pte_range(struct vm_fault *vmf, struct folio *folio,
struct page *page, unsigned int nr, unsigned long addr);
@@ -2004,6 +1780,45 @@ static inline struct folio *pfn_folio(unsigned long pfn)
return page_folio(pfn_to_page(pfn));
}
+#ifdef CONFIG_MMU
+static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+ return pfn_pte(page_to_pfn(page), pgprot);
+}
+
+/**
+ * folio_mk_pte - Create a PTE for this folio
+ * @folio: The folio to create a PTE for
+ * @pgprot: The page protection bits to use
+ *
+ * Create a page table entry for the first page of this folio.
+ * This is suitable for passing to set_ptes().
+ *
+ * Return: A page table entry suitable for mapping this folio.
+ */
+static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot)
+{
+ return pfn_pte(folio_pfn(folio), pgprot);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/**
+ * folio_mk_pmd - Create a PMD for this folio
+ * @folio: The folio to create a PMD for
+ * @pgprot: The page protection bits to use
+ *
+ * Create a page table entry for the first page of this folio.
+ * This is suitable for passing to set_pmd_at().
+ *
+ * Return: A page table entry suitable for mapping this folio.
+ */
+static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot)
+{
+ return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot));
+}
+#endif
+#endif /* CONFIG_MMU */
+
static inline bool folio_has_pincount(const struct folio *folio)
{
if (IS_ENABLED(CONFIG_64BIT))
@@ -2185,15 +2000,6 @@ static inline long compound_nr(struct page *page)
}
/**
- * thp_nr_pages - The number of regular pages in this huge page.
- * @page: The head page of a huge page.
- */
-static inline long thp_nr_pages(struct page *page)
-{
- return folio_nr_pages((struct folio *)page);
-}
-
-/**
* folio_next - Move to the next physical folio.
* @folio: The folio we're currently operating on.
*
@@ -2303,7 +2109,62 @@ static inline bool folio_maybe_mapped_shared(struct folio *folio)
*/
if (mapcount <= 1)
return false;
- return folio_test_large_maybe_mapped_shared(folio);
+ return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids);
+}
+
+/**
+ * folio_expected_ref_count - calculate the expected folio refcount
+ * @folio: the folio
+ *
+ * Calculate the expected folio refcount, taking references from the pagecache,
+ * swapcache, PG_private and page table mappings into account. Useful in
+ * combination with folio_ref_count() to detect unexpected references (e.g.,
+ * GUP or other temporary references).
+ *
+ * Does currently not consider references from the LRU cache. If the folio
+ * was isolated from the LRU (which is the case during migration or split),
+ * the LRU cache does not apply.
+ *
+ * Calling this function on an unmapped folio -- !folio_mapped() -- that is
+ * locked will return a stable result.
+ *
+ * Calling this function on a mapped folio will not result in a stable result,
+ * because nothing stops additional page table mappings from coming (e.g.,
+ * fork()) or going (e.g., munmap()).
+ *
+ * Calling this function without the folio lock will also not result in a
+ * stable result: for example, the folio might get dropped from the swapcache
+ * concurrently.
+ *
+ * However, even when called without the folio lock or on a mapped folio,
+ * this function can be used to detect unexpected references early (for example,
+ * if it makes sense to even lock the folio and unmap it).
+ *
+ * The caller must add any reference (e.g., from folio_try_get()) it might be
+ * holding itself to the result.
+ *
+ * Returns the expected folio refcount.
+ */
+static inline int folio_expected_ref_count(const struct folio *folio)
+{
+ const int order = folio_order(folio);
+ int ref_count = 0;
+
+ if (WARN_ON_ONCE(folio_test_slab(folio)))
+ return 0;
+
+ if (folio_test_anon(folio)) {
+ /* One reference per page from the swapcache. */
+ ref_count += folio_test_swapcache(folio) << order;
+ } else if (!((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS)) {
+ /* One reference per page from the pagecache. */
+ ref_count += !!folio->mapping << order;
+ /* One reference from PG_private. */
+ ref_count += folio_test_private(folio);
+ }
+
+ /* One reference per page table mapping. */
+ return ref_count + folio_mapcount(folio);
}
#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
@@ -2406,7 +2267,6 @@ static inline void clear_page_pfmemalloc(struct page *page)
extern void pagefault_out_of_memory(void);
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
-#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1))
#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1))
/*
@@ -2767,7 +2627,7 @@ static inline void update_hiwater_rss(struct mm_struct *mm)
{
unsigned long _rss = get_mm_rss(mm);
- if ((mm)->hiwater_rss < _rss)
+ if (data_race(mm->hiwater_rss) < _rss)
(mm)->hiwater_rss = _rss;
}
@@ -3117,9 +2977,10 @@ static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
pagetable_free(ptdesc);
}
-static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
+static inline bool pagetable_pte_ctor(struct mm_struct *mm,
+ struct ptdesc *ptdesc)
{
- if (!ptlock_init(ptdesc))
+ if (mm != &init_mm && !ptlock_init(ptdesc))
return false;
__pagetable_ctor(ptdesc);
return true;
@@ -3223,9 +3084,10 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
return ptl;
}
-static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc)
+static inline bool pagetable_pmd_ctor(struct mm_struct *mm,
+ struct ptdesc *ptdesc)
{
- if (!pmd_ptlock_init(ptdesc))
+ if (mm != &init_mm && !pmd_ptlock_init(ptdesc))
return false;
ptdesc_pmd_pts_init(ptdesc);
__pagetable_ctor(ptdesc);
@@ -3414,7 +3276,6 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void exit_mmap(struct mm_struct *);
-int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift);
bool mmap_read_lock_maybe_expand(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, bool write);
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index f9157a0c42a5..89b518ff097e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -447,6 +447,8 @@ static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
#endif /* CONFIG_ANON_VMA_NAME */
+void pfnmap_track_ctx_release(struct kref *ref);
+
static inline void init_tlb_flush_pending(struct mm_struct *mm)
{
atomic_set(&mm->tlb_flush_pending, 0);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 32ba5126e221..d3cffd8828c9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -28,7 +28,6 @@
#endif
#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
-#define INIT_PASID 0
struct address_space;
struct futex_private_hash;
@@ -765,6 +764,38 @@ struct vma_numab_state {
int prev_scan_seq;
};
+#ifdef __HAVE_PFNMAP_TRACKING
+struct pfnmap_track_ctx {
+ struct kref kref;
+ unsigned long pfn;
+ unsigned long size; /* in bytes */
+};
+#endif
+
+/*
+ * Describes a VMA that is about to be mmap()'ed. Drivers may choose to
+ * manipulate mutable fields which will cause those fields to be updated in the
+ * resultant VMA.
+ *
+ * Helper functions are not required for manipulating any field.
+ */
+struct vm_area_desc {
+ /* Immutable state. */
+ struct mm_struct *mm;
+ unsigned long start;
+ unsigned long end;
+
+ /* Mutable fields. Populated with initial state. */
+ pgoff_t pgoff;
+ struct file *file;
+ vm_flags_t vm_flags;
+ pgprot_t page_prot;
+
+ /* Write-only fields. */
+ const struct vm_operations_struct *vm_ops;
+ void *private_data;
+};
+
/*
* This struct describes a virtual memory area. There is one of these
* per VM-area/task. A VM area is any part of the process virtual memory
@@ -878,6 +909,9 @@ struct vm_area_struct {
struct anon_vma_name *anon_name;
#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
+#ifdef __HAVE_PFNMAP_TRACKING
+ struct pfnmap_track_ctx *pfnmap_track_ctx;
+#endif
} __randomize_layout;
#ifdef CONFIG_NUMA
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index e0eddfd306ef..5da384bd0a26 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -1,6 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_MMAP_LOCK_H
#define _LINUX_MMAP_LOCK_H
+/* Avoid a dependency loop by declaring here. */
+extern int rcuwait_wake_up(struct rcuwait *w);
+
#include <linux/lockdep.h>
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
@@ -105,6 +109,206 @@ static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int
return read_seqcount_retry(&mm->mm_lock_seq, seq);
}
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ static struct lock_class_key lockdep_key;
+
+ lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0);
+#endif
+ if (reset_refcnt)
+ refcount_set(&vma->vm_refcnt, 0);
+ vma->vm_lock_seq = UINT_MAX;
+}
+
+static inline bool is_vma_writer_only(int refcnt)
+{
+ /*
+ * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma
+ * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on
+ * a detached vma happens only in vma_mark_detached() and is a rare
+ * case, therefore most of the time there will be no unnecessary wakeup.
+ */
+ return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1;
+}
+
+static inline void vma_refcount_put(struct vm_area_struct *vma)
+{
+ /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */
+ struct mm_struct *mm = vma->vm_mm;
+ int oldcnt;
+
+ rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
+ if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) {
+
+ if (is_vma_writer_only(oldcnt - 1))
+ rcuwait_wake_up(&mm->vma_writer_wait);
+ }
+}
+
+/*
+ * Try to read-lock a vma. The function is allowed to occasionally yield false
+ * locked result to avoid performance overhead, in which case we fall back to
+ * using mmap_lock. The function should never yield false unlocked result.
+ * False locked result is possible if mm_lock_seq overflows or if vma gets
+ * reused and attached to a different mm before we lock it.
+ * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got
+ * detached.
+ */
+static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
+ struct vm_area_struct *vma)
+{
+ int oldcnt;
+
+ /*
+ * Check before locking. A race might cause false locked result.
+ * We can use READ_ONCE() for the mm_lock_seq here, and don't need
+ * ACQUIRE semantics, because this is just a lockless check whose result
+ * we don't rely on for anything - the mm_lock_seq read against which we
+ * need ordering is below.
+ */
+ if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence))
+ return NULL;
+
+ /*
+ * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire()
+ * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET.
+ * Acquire fence is required here to avoid reordering against later
+ * vm_lock_seq check and checks inside lock_vma_under_rcu().
+ */
+ if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
+ VMA_REF_LIMIT))) {
+ /* return EAGAIN if vma got detached from under us */
+ return oldcnt ? NULL : ERR_PTR(-EAGAIN);
+ }
+
+ rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
+ /*
+ * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result.
+ * False unlocked result is impossible because we modify and check
+ * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq
+ * modification invalidates all existing locks.
+ *
+ * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
+ * racing with vma_end_write_all(), we only start reading from the VMA
+ * after it has been unlocked.
+ * This pairs with RELEASE semantics in vma_end_write_all().
+ */
+ if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) {
+ vma_refcount_put(vma);
+ return NULL;
+ }
+
+ return vma;
+}
+
+/*
+ * Use only while holding mmap read lock which guarantees that locking will not
+ * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+ * not be used in such cases because it might fail due to mm_lock_seq overflow.
+ * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ */
+static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass)
+{
+ int oldcnt;
+
+ mmap_assert_locked(vma->vm_mm);
+ if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt,
+ VMA_REF_LIMIT)))
+ return false;
+
+ rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_);
+ return true;
+}
+
+/*
+ * Use only while holding mmap read lock which guarantees that locking will not
+ * fail (nobody can concurrently write-lock the vma). vma_start_read() should
+ * not be used in such cases because it might fail due to mm_lock_seq overflow.
+ * This functionality is used to obtain vma read lock and drop the mmap read lock.
+ */
+static inline bool vma_start_read_locked(struct vm_area_struct *vma)
+{
+ return vma_start_read_locked_nested(vma, 0);
+}
+
+static inline void vma_end_read(struct vm_area_struct *vma)
+{
+ vma_refcount_put(vma);
+}
+
+/* WARNING! Can only be used if mmap_lock is expected to be write-locked */
+static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
+{
+ mmap_assert_write_locked(vma->vm_mm);
+
+ /*
+ * current task is holding mmap_write_lock, both vma->vm_lock_seq and
+ * mm->mm_lock_seq can't be concurrently modified.
+ */
+ *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
+ return (vma->vm_lock_seq == *mm_lock_seq);
+}
+
+void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq);
+
+/*
+ * Begin writing to a VMA.
+ * Exclude concurrent readers under the per-VMA lock until the currently
+ * write-locked mmap_lock is dropped or downgraded.
+ */
+static inline void vma_start_write(struct vm_area_struct *vma)
+{
+ unsigned int mm_lock_seq;
+
+ if (__is_vma_write_locked(vma, &mm_lock_seq))
+ return;
+
+ __vma_start_write(vma, mm_lock_seq);
+}
+
+static inline void vma_assert_write_locked(struct vm_area_struct *vma)
+{
+ unsigned int mm_lock_seq;
+
+ VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
+}
+
+static inline void vma_assert_locked(struct vm_area_struct *vma)
+{
+ unsigned int mm_lock_seq;
+
+ VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 &&
+ !__is_vma_write_locked(vma, &mm_lock_seq), vma);
+}
+
+/*
+ * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
+ * assertions should be made either under mmap_write_lock or when the object
+ * has been isolated under mmap_write_lock, ensuring no competing writers.
+ */
+static inline void vma_assert_attached(struct vm_area_struct *vma)
+{
+ WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
+}
+
+static inline void vma_assert_detached(struct vm_area_struct *vma)
+{
+ WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
+}
+
+static inline void vma_mark_attached(struct vm_area_struct *vma)
+{
+ vma_assert_write_locked(vma);
+ vma_assert_detached(vma);
+ refcount_set_release(&vma->vm_refcnt, 1);
+}
+
+void vma_mark_detached(struct vm_area_struct *vma);
+
+struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
+ unsigned long address);
+
#else /* CONFIG_PER_VMA_LOCK */
static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
@@ -120,6 +324,29 @@ static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int
{
return true;
}
+static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {}
+static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
+ struct vm_area_struct *vma)
+ { return NULL; }
+static inline void vma_end_read(struct vm_area_struct *vma) {}
+static inline void vma_start_write(struct vm_area_struct *vma) {}
+static inline void vma_assert_write_locked(struct vm_area_struct *vma)
+ { mmap_assert_write_locked(vma->vm_mm); }
+static inline void vma_assert_attached(struct vm_area_struct *vma) {}
+static inline void vma_assert_detached(struct vm_area_struct *vma) {}
+static inline void vma_mark_attached(struct vm_area_struct *vma) {}
+static inline void vma_mark_detached(struct vm_area_struct *vma) {}
+
+static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
+ unsigned long address)
+{
+ return NULL;
+}
+
+static inline void vma_assert_locked(struct vm_area_struct *vma)
+{
+ mmap_assert_locked(vma->vm_mm);
+}
#endif /* CONFIG_PER_VMA_LOCK */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b1c459f7a485..28066b4ced81 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2074,11 +2074,37 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
return usage ? test_bit(idx, usage->subsection_map) : 0;
}
+
+static inline bool pfn_section_first_valid(struct mem_section *ms, unsigned long *pfn)
+{
+ struct mem_section_usage *usage = READ_ONCE(ms->usage);
+ int idx = subsection_map_index(*pfn);
+ unsigned long bit;
+
+ if (!usage)
+ return false;
+
+ if (test_bit(idx, usage->subsection_map))
+ return true;
+
+ /* Find the next subsection that exists */
+ bit = find_next_bit(usage->subsection_map, SUBSECTIONS_PER_SECTION, idx);
+ if (bit == SUBSECTIONS_PER_SECTION)
+ return false;
+
+ *pfn = (*pfn & PAGE_SECTION_MASK) + (bit * PAGES_PER_SUBSECTION);
+ return true;
+}
#else
static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
{
return 1;
}
+
+static inline bool pfn_section_first_valid(struct mem_section *ms, unsigned long *pfn)
+{
+ return true;
+}
#endif
void sparse_init_early_section(int nid, struct page *map, unsigned long pnum,
@@ -2127,6 +2153,58 @@ static inline int pfn_valid(unsigned long pfn)
return ret;
}
+
+/* Returns end_pfn or higher if no valid PFN remaining in range */
+static inline unsigned long first_valid_pfn(unsigned long pfn, unsigned long end_pfn)
+{
+ unsigned long nr = pfn_to_section_nr(pfn);
+
+ rcu_read_lock_sched();
+
+ while (nr <= __highest_present_section_nr && pfn < end_pfn) {
+ struct mem_section *ms = __pfn_to_section(pfn);
+
+ if (valid_section(ms) &&
+ (early_section(ms) || pfn_section_first_valid(ms, &pfn))) {
+ rcu_read_unlock_sched();
+ return pfn;
+ }
+
+ /* Nothing left in this section? Skip to next section */
+ nr++;
+ pfn = section_nr_to_pfn(nr);
+ }
+
+ rcu_read_unlock_sched();
+ return end_pfn;
+}
+
+static inline unsigned long next_valid_pfn(unsigned long pfn, unsigned long end_pfn)
+{
+ pfn++;
+
+ if (pfn >= end_pfn)
+ return end_pfn;
+
+ /*
+ * Either every PFN within the section (or subsection for VMEMMAP) is
+ * valid, or none of them are. So there's no point repeating the check
+ * for every PFN; only call first_valid_pfn() again when crossing a
+ * (sub)section boundary (i.e. !(pfn & ~PAGE_{SUB,}SECTION_MASK)).
+ */
+ if (pfn & ~(IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) ?
+ PAGE_SUBSECTION_MASK : PAGE_SECTION_MASK))
+ return pfn;
+
+ return first_valid_pfn(pfn, end_pfn);
+}
+
+
+#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \
+ for ((_pfn) = first_valid_pfn((_start_pfn), (_end_pfn)); \
+ (_pfn) < (_end_pfn); \
+ (_pfn) = next_valid_pfn((_pfn), (_end_pfn)))
+
#endif
static inline int pfn_in_present_section(unsigned long pfn)
@@ -2176,6 +2254,16 @@ void sparse_init(void);
#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
+/*
+ * Fallback case for when the architecture provides its own pfn_valid() but
+ * not a corresponding for_each_valid_pfn().
+ */
+#ifndef for_each_valid_pfn
+#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \
+ for ((_pfn) = (_start_pfn); (_pfn) < (_end_pfn); (_pfn)++) \
+ if (pfn_valid(_pfn))
+#endif
+
#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
#endif /* _LINUX_MMZONE_H */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 6904ad33ee7a..d3ee0e5162f0 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -101,9 +101,6 @@ extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
int flags, const char *name,
void *data);
-extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
- struct file_system_type *type,
- const char *name, void *data);
extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
extern void mark_mounts_for_expiry(struct list_head *mounts);
diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h
index dd85613cdd86..991076cba7c5 100644
--- a/include/linux/numa_memblks.h
+++ b/include/linux/numa_memblks.h
@@ -22,6 +22,7 @@ struct numa_meminfo {
};
int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+int __init numa_add_reserved_memblk(int nid, u64 start, u64 end);
void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h
index 6f9242259edc..6de479ebbe5d 100644
--- a/include/linux/oid_registry.h
+++ b/include/linux/oid_registry.h
@@ -151,6 +151,5 @@ enum OID {
extern enum OID look_up_OID(const void *data, size_t datasize);
extern int parse_OID(const void *data, size_t datasize, enum OID *oid);
extern int sprint_oid(const void *, size_t, char *, size_t);
-extern int sprint_OID(enum OID, char *, size_t);
#endif /* _LINUX_OID_REGISTRY_H */
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 7b7be27ca113..154ed0dbb43f 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -389,25 +389,38 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
struct_size((type *)NULL, member, count)
/**
- * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
- * Enables caller macro to pass (different) initializer.
+ * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
+ * Enables caller macro to pass arbitrary trailing expressions
*
* @type: structure type name, including "struct" keyword.
* @name: Name for a variable to define.
* @member: Name of the array member.
* @count: Number of elements in the array; must be compile-time const.
- * @initializer: Initializer expression (e.g., pass `= { }` at minimum).
+ * @trailer: Trailing expressions for attributes and/or initializers.
*/
-#define _DEFINE_FLEX(type, name, member, count, initializer...) \
+#define __DEFINE_FLEX(type, name, member, count, trailer...) \
_Static_assert(__builtin_constant_p(count), \
"onstack flex array members require compile-time const count"); \
union { \
u8 bytes[struct_size_t(type, member, count)]; \
type obj; \
- } name##_u = { .obj initializer }; \
+ } name##_u trailer; \
type *name = (type *)&name##_u
/**
+ * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family.
+ * Enables caller macro to pass (different) initializer.
+ *
+ * @type: structure type name, including "struct" keyword.
+ * @name: Name for a variable to define.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array; must be compile-time const.
+ * @initializer: Initializer expression (e.g., pass `= { }` at minimum).
+ */
+#define _DEFINE_FLEX(type, name, member, count, initializer...) \
+ __DEFINE_FLEX(type, name, member, count, = { .obj initializer })
+
+/**
* DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing
* flexible array member, when it does not have a __counted_by annotation.
*
@@ -424,7 +437,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
* elements in array @member.
*/
#define DEFINE_RAW_FLEX(type, name, member, count) \
- _DEFINE_FLEX(type, name, member, count, = {})
+ __DEFINE_FLEX(type, name, member, count, = { })
/**
* DEFINE_FLEX() - Define an on-stack instance of structure with a trailing
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index 4f5c9e979bb9..760006b1c480 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -72,8 +72,10 @@
#define NODE_NOT_IN_PAGE_FLAGS 1
#endif
-#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
+#if defined(CONFIG_KASAN_SW_TAGS)
#define KASAN_TAG_WIDTH 8
+#elif defined(CONFIG_KASAN_HW_TAGS)
+#define KASAN_TAG_WIDTH 4
#else
#define KASAN_TAG_WIDTH 0
#endif
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 3b814ce08331..4fe5ee67535b 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -915,20 +915,6 @@ FOLIO_FLAG_FALSE(partially_mapped)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
- * PageHuge() only returns true for hugetlbfs pages, but not for
- * normal or transparent huge pages.
- *
- * PageTransHuge() returns true for both transparent huge and
- * hugetlbfs pages, but not normal pages. PageTransHuge() can only be
- * called only in the core VM paths where hugetlbfs pages can't exist.
- */
-static inline int PageTransHuge(const struct page *page)
-{
- VM_BUG_ON_PAGE(PageTail(page), page);
- return PageHead(page);
-}
-
-/*
* PageTransCompound returns true for both transparent huge pages
* and hugetlbfs pages, so it should only be called when it's known
* that hugetlbfs pages aren't involved.
@@ -938,7 +924,6 @@ static inline int PageTransCompound(const struct page *page)
return PageCompound(page);
}
#else
-TESTPAGEFLAG_FALSE(TransHuge, transhuge)
TESTPAGEFLAG_FALSE(TransCompound, transcompound)
#endif
@@ -989,7 +974,7 @@ static inline bool page_mapcount_is_type(unsigned int mapcount)
static inline bool page_has_type(const struct page *page)
{
- return page_mapcount_is_type(data_race(page->page_type));
+ return page_type_has_type(data_race(page->page_type));
}
#define FOLIO_TYPE_OPS(lname, fname) \
@@ -1237,10 +1222,6 @@ static inline int folio_has_private(const struct folio *folio)
return !!(folio->flags & PAGE_FLAGS_PRIVATE);
}
-static inline bool folio_test_large_maybe_mapped_shared(const struct folio *folio)
-{
- return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids);
-}
#undef PF_ANY
#undef PF_HEAD
#undef PF_NO_TAIL
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 26baa78f1ca7..d2ced9920992 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -533,7 +533,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
}
struct address_space *folio_mapping(struct folio *);
-struct address_space *swapcache_mapping(struct folio *);
/**
* folio_flush_mapping - Find the file mapping this folio belongs to.
@@ -884,26 +883,6 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
-extern pgoff_t __folio_swap_cache_index(struct folio *folio);
-
-/**
- * folio_index - File index of a folio.
- * @folio: The folio.
- *
- * For a folio which is either in the page cache or the swap cache,
- * return its index within the address_space it belongs to. If you know
- * the page is definitely in the page cache, you can look at the folio's
- * index directly.
- *
- * Return: The index (offset in units of pages) of a folio in its file.
- */
-static inline pgoff_t folio_index(struct folio *folio)
-{
- if (unlikely(folio_test_swapcache(folio)))
- return __folio_swap_cache_index(folio);
- return folio->index;
-}
-
/**
* folio_next_index - Get the index of the next folio.
* @folio: The current folio.
@@ -935,27 +914,14 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
* @folio: The folio.
* @index: The page index within the file.
*
- * Context: The caller should have the page locked in order to prevent
- * (eg) shmem from moving the page between the page cache and swap cache
- * and changing its index in the middle of the operation.
+ * Context: The caller should have the folio locked and ensure
+ * e.g., shmem did not move this folio to the swap cache.
* Return: true or false.
*/
static inline bool folio_contains(struct folio *folio, pgoff_t index)
{
- return index - folio_index(folio) < folio_nr_pages(folio);
-}
-
-/*
- * Given the page we found in the page cache, return the page corresponding
- * to this index in the file
- */
-static inline struct page *find_subpage(struct page *head, pgoff_t index)
-{
- /* HugeTLBfs wants the head page regardless */
- if (PageHuge(head))
- return head;
-
- return head + (index & (thp_nr_pages(head) - 1));
+ VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio);
+ return index - folio->index < folio_nr_pages(folio);
}
unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
@@ -1308,9 +1274,9 @@ static inline bool filemap_range_needs_writeback(struct address_space *mapping,
* struct readahead_control - Describes a readahead request.
*
* A readahead request is for consecutive pages. Filesystems which
- * implement the ->readahead method should call readahead_page() or
- * readahead_page_batch() in a loop and attempt to start I/O against
- * each page in the request.
+ * implement the ->readahead method should call readahead_folio() or
+ * __readahead_batch() in a loop and attempt to start reads into each
+ * folio in the request.
*
* Most of the fields in this struct are private and should be accessed
* by the functions below.
@@ -1416,22 +1382,6 @@ static inline struct folio *__readahead_folio(struct readahead_control *ractl)
}
/**
- * readahead_page - Get the next page to read.
- * @ractl: The current readahead request.
- *
- * Context: The page is locked and has an elevated refcount. The caller
- * should decreases the refcount once the page has been submitted for I/O
- * and unlock the page once all I/O to that page has completed.
- * Return: A pointer to the next page, or %NULL if we are done.
- */
-static inline struct page *readahead_page(struct readahead_control *ractl)
-{
- struct folio *folio = __readahead_folio(ractl);
-
- return &folio->page;
-}
-
-/**
* readahead_folio - Get the next folio to read.
* @ractl: The current readahead request.
*
@@ -1453,7 +1403,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
{
unsigned int i = 0;
XA_STATE(xas, &rac->mapping->i_pages, 0);
- struct page *page;
+ struct folio *folio;
BUG_ON(rac->_batch_count > rac->_nr_pages);
rac->_nr_pages -= rac->_batch_count;
@@ -1462,13 +1412,12 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
xas_set(&xas, rac->_index);
rcu_read_lock();
- xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
- if (xas_retry(&xas, page))
+ xas_for_each(&xas, folio, rac->_index + rac->_nr_pages - 1) {
+ if (xas_retry(&xas, folio))
continue;
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(PageTail(page), page);
- array[i++] = page;
- rac->_batch_count += thp_nr_pages(page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ array[i++] = folio_page(folio, 0);
+ rac->_batch_count += folio_nr_pages(folio);
if (i == array_sz)
break;
}
@@ -1478,20 +1427,6 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac,
}
/**
- * readahead_page_batch - Get a batch of pages to read.
- * @rac: The current readahead request.
- * @array: An array of pointers to struct page.
- *
- * Context: The pages are locked and have an elevated refcount. The caller
- * should decreases the refcount once the page has been submitted for I/O
- * and unlock the page once all I/O to that page has completed.
- * Return: The number of pages placed in the array. 0 indicates the request
- * is complete.
- */
-#define readahead_page_batch(rac, array) \
- __readahead_batch(rac, array, ARRAY_SIZE(array))
-
-/**
* readahead_pos - The byte offset into the file of this readahead request.
* @rac: The readahead request.
*/
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 0aeb0e276a3e..c16cdeaa505e 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -375,7 +375,7 @@ do { \
} while (0)
/*
- * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@linux.com>
+ * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@gentwo.org>
*
* Optimized manipulation for memory allocated through the per cpu
* allocator or for addresses of per cpu variables.
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index b50447ef1c92..0b6e1f781d86 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1164,10 +1164,6 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
}
#endif
-#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
-#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
-#endif
-
#ifndef __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, old_addr, new_addr) (pte)
#endif
@@ -1489,83 +1485,92 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
* vmf_insert_pfn.
*/
-/*
- * track_pfn_remap is called when a _new_ pfn mapping is being established
- * by remap_pfn_range() for physical range indicated by pfn and size.
- */
-static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn, unsigned long addr,
- unsigned long size)
+static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
+ pgprot_t *prot)
{
return 0;
}
-/*
- * track_pfn_insert is called when a _new_ single pfn is established
- * by vmf_insert_pfn().
- */
-static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
- pfn_t pfn)
+static inline int pfnmap_track(unsigned long pfn, unsigned long size,
+ pgprot_t *prot)
{
+ return 0;
}
-/*
- * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
- * tables copied during copy_page_range(). Will store the pfn to be
- * passed to untrack_pfn_copy() only if there is something to be untracked.
- * Callers should initialize the pfn to 0.
- */
-static inline int track_pfn_copy(struct vm_area_struct *dst_vma,
- struct vm_area_struct *src_vma, unsigned long *pfn)
+static inline void pfnmap_untrack(unsigned long pfn, unsigned long size)
{
- return 0;
}
+#else
+/**
+ * pfnmap_setup_cachemode - setup the cachemode in the pgprot for a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ * @prot: the pgprot to modify
+ *
+ * Lookup the cachemode for the pfn range starting at @pfn with the size
+ * @size and store it in @prot, leaving other data in @prot unchanged.
+ *
+ * This allows for a hardware implementation to have fine-grained control of
+ * memory cache behavior at page level granularity. Without a hardware
+ * implementation, this function does nothing.
+ *
+ * Currently there is only one implementation for this - x86 Page Attribute
+ * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
+ *
+ * This function can fail if the pfn range spans pfns that require differing
+ * cachemodes. If the pfn range was previously verified to have a single
+ * cachemode, it is sufficient to query only a single pfn. The assumption is
+ * that this is the case for drivers using the vmf_insert_pfn*() interface.
+ *
+ * Returns 0 on success and -EINVAL on error.
+ */
+int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
+ pgprot_t *prot);
-/*
- * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during
- * copy_page_range(), but after track_pfn_copy() was already called. Can
- * be called even if track_pfn_copy() did not actually track anything:
- * handled internally.
+/**
+ * pfnmap_track - track a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ * @prot: the pgprot to track
+ *
+ * Requested the pfn range to be 'tracked' by a hardware implementation and
+ * setup the cachemode in @prot similar to pfnmap_setup_cachemode().
+ *
+ * This allows for fine-grained control of memory cache behaviour at page
+ * level granularity. Tracking memory this way is persisted across VMA splits
+ * (VMA merging does not apply for VM_PFNMAP).
+ *
+ * Currently, there is only one implementation for this - x86 Page Attribute
+ * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
+ *
+ * Returns 0 on success and -EINVAL on error.
*/
-static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma,
- unsigned long pfn)
-{
-}
+int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot);
-/*
- * untrack_pfn is called while unmapping a pfnmap for a region.
- * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case pfn, size are zero).
+/**
+ * pfnmap_untrack - untrack a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ *
+ * Untrack a pfn range previously tracked through pfnmap_track().
*/
-static inline void untrack_pfn(struct vm_area_struct *vma,
- unsigned long pfn, unsigned long size,
- bool mm_wr_locked)
-{
-}
+void pfnmap_untrack(unsigned long pfn, unsigned long size);
+#endif
-/*
- * untrack_pfn_clear is called in the following cases on a VM_PFNMAP VMA:
+/**
+ * pfnmap_setup_cachemode_pfn - setup the cachemode in the pgprot for a pfn
+ * @pfn: the pfn
+ * @prot: the pgprot to modify
+ *
+ * Lookup the cachemode for @pfn and store it in @prot, leaving other
+ * data in @prot unchanged.
*
- * 1) During mremap() on the src VMA after the page tables were moved.
- * 2) During fork() on the dst VMA, immediately after duplicating the src VMA.
+ * See pfnmap_setup_cachemode() for details.
*/
-static inline void untrack_pfn_clear(struct vm_area_struct *vma)
+static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot)
{
+ pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot);
}
-#else
-extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
- unsigned long pfn, unsigned long addr,
- unsigned long size);
-extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
- pfn_t pfn);
-extern int track_pfn_copy(struct vm_area_struct *dst_vma,
- struct vm_area_struct *src_vma, unsigned long *pfn);
-extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
- unsigned long pfn);
-extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
- unsigned long size, bool mm_wr_locked);
-extern void untrack_pfn_clear(struct vm_area_struct *vma);
-#endif
#ifdef CONFIG_MMU
#ifdef __HAVE_COLOR_ZERO_PAGE
diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h
index 8dbd51ea8626..240bd3bff18d 100644
--- a/include/linux/ptdump.h
+++ b/include/linux/ptdump.h
@@ -11,10 +11,17 @@ struct ptdump_range {
};
struct ptdump_state {
- /* level is 0:PGD to 4:PTE, or -1 if unknown */
- void (*note_page)(struct ptdump_state *st, unsigned long addr,
- int level, u64 val);
- void (*effective_prot)(struct ptdump_state *st, int level, u64 val);
+ void (*note_page_pte)(struct ptdump_state *st, unsigned long addr, pte_t pte);
+ void (*note_page_pmd)(struct ptdump_state *st, unsigned long addr, pmd_t pmd);
+ void (*note_page_pud)(struct ptdump_state *st, unsigned long addr, pud_t pud);
+ void (*note_page_p4d)(struct ptdump_state *st, unsigned long addr, p4d_t p4d);
+ void (*note_page_pgd)(struct ptdump_state *st, unsigned long addr, pgd_t pgd);
+ void (*note_page_flush)(struct ptdump_state *st);
+ void (*effective_prot_pte)(struct ptdump_state *st, pte_t pte);
+ void (*effective_prot_pmd)(struct ptdump_state *st, pmd_t pmd);
+ void (*effective_prot_pud)(struct ptdump_state *st, pud_t pud);
+ void (*effective_prot_p4d)(struct ptdump_state *st, p4d_t p4d);
+ void (*effective_prot_pgd)(struct ptdump_state *st, pgd_t pgd);
const struct ptdump_range *range;
};
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 72b876dd5cb8..b3224111d074 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -159,9 +159,6 @@ struct rchan *relay_open(const char *base_filename,
size_t n_subbufs,
const struct rchan_callbacks *cb,
void *private_data);
-extern int relay_late_setup_files(struct rchan *chan,
- const char *base_filename,
- struct dentry *parent);
extern void relay_close(struct rchan *chan);
extern void relay_flush(struct rchan *chan);
extern void relay_subbufs_consumed(struct rchan *chan,
diff --git a/include/linux/reset.h b/include/linux/reset.h
index 2986ced69a02..840d75d172f6 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -1005,6 +1005,12 @@ devm_reset_control_array_get_exclusive(struct device *dev)
}
static inline struct reset_control *
+devm_reset_control_array_get_exclusive_released(struct device *dev)
+{
+ return devm_reset_control_array_get(dev, RESET_CONTROL_EXCLUSIVE_RELEASED);
+}
+
+static inline struct reset_control *
devm_reset_control_array_get_shared(struct device *dev)
{
return devm_reset_control_array_get(dev, RESET_CONTROL_SHARED);
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index e49c32b0f394..dd8afe511242 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -391,13 +391,8 @@ struct rio_dev *rio_dev_get(struct rio_dev *);
void rio_dev_put(struct rio_dev *);
#ifdef CONFIG_RAPIDIO_DMA_ENGINE
-extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport);
extern void rio_release_dma(struct dma_chan *dchan);
-extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
- struct rio_dev *rdev, struct dma_chan *dchan,
- struct rio_dma_data *data,
- enum dma_transfer_direction direction, unsigned long flags);
extern struct dma_async_tx_descriptor *rio_dma_prep_xfer(
struct dma_chan *dchan, u16 destid,
struct rio_dma_data *data,
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 6b82b618846e..c4f4903b1088 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -223,7 +223,7 @@ static inline void __folio_large_mapcount_sanity_checks(const struct folio *foli
VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY &&
folio->_mm_id_mapcount[1] < 0);
VM_WARN_ON_ONCE(!folio_mapped(folio) &&
- folio_test_large_maybe_mapped_shared(folio));
+ test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids));
}
static __always_inline void folio_set_large_mapcount(struct folio *folio,
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 138e2f1bd08f..0cdbfc42f153 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -95,6 +95,28 @@ static inline bool sg_is_last(struct scatterlist *sg)
}
/**
+ * sg_next - return the next scatterlist entry in a list
+ * @sg: The current sg entry
+ *
+ * Description:
+ * Usually the next entry will be @sg@ + 1, but if this sg element is part
+ * of a chained scatterlist, it could jump to the start of a new
+ * scatterlist array.
+ *
+ **/
+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+ if (sg_is_last(sg))
+ return NULL;
+
+ sg++;
+ if (unlikely(sg_is_chain(sg)))
+ sg = sg_chain_ptr(sg);
+
+ return sg;
+}
+
+/**
* sg_assign_page - Assign a given page to an SG entry
* @sg: SG entry
* @page: The page
@@ -418,7 +440,6 @@ static inline void sg_init_marker(struct scatterlist *sgl,
int sg_nents(struct scatterlist *sg);
int sg_nents_for_len(struct scatterlist *sg, u64 len);
-struct scatterlist *sg_next(struct scatterlist *);
struct scatterlist *sg_last(struct scatterlist *s, unsigned int);
void sg_init_table(struct scatterlist *, unsigned int);
void sg_init_one(struct scatterlist *, const void *, unsigned int);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1f054f1f11b5..aa9c5be7a632 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1240,7 +1240,11 @@ struct task_struct {
#endif
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
- struct mutex *blocker_mutex;
+ /*
+ * Encoded lock address causing task block (lower 2 bits = type from
+ * <linux/hung_task.h>). Accessed via hung_task_*() helpers.
+ */
+ unsigned long blocker;
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index cffad65bdc6a..85c5a6392e02 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -106,7 +106,6 @@ static inline unsigned long stack_not_used(struct task_struct *p)
#endif
extern void set_task_stack_end_magic(struct task_struct *tsk);
-#ifndef __HAVE_ARCH_KSTACK_END
static inline int kstack_end(void *addr)
{
/* Reliable end of stack detection:
@@ -114,6 +113,5 @@ static inline int kstack_end(void *addr)
*/
return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
}
-#endif
#endif /* _LINUX_SCHED_TASK_STACK_H */
diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h
index 53b356a26414..27bd372cbfb1 100644
--- a/include/linux/scmi_imx_protocol.h
+++ b/include/linux/scmi_imx_protocol.h
@@ -11,9 +11,12 @@
#include <linux/bitfield.h>
#include <linux/device.h>
#include <linux/notifier.h>
+#include <linux/scmi_protocol.h>
#include <linux/types.h>
+#define SCMI_PROTOCOL_IMX_LMM 0x80
#define SCMI_PROTOCOL_IMX_BBM 0x81
+#define SCMI_PROTOCOL_IMX_CPU 0x82
#define SCMI_PROTOCOL_IMX_MISC 0x84
#define SCMI_IMX_VENDOR "NXP"
@@ -57,4 +60,43 @@ struct scmi_imx_misc_proto_ops {
int (*misc_ctrl_req_notify)(const struct scmi_protocol_handle *ph,
u32 ctrl_id, u32 evt_id, u32 flags);
};
+
+/* See LMM_ATTRIBUTES in imx95.rst */
+#define LMM_ID_DISCOVER 0xFFFFFFFFU
+#define LMM_MAX_NAME 16
+
+enum scmi_imx_lmm_state {
+ LMM_STATE_LM_OFF,
+ LMM_STATE_LM_ON,
+ LMM_STATE_LM_SUSPEND,
+ LMM_STATE_LM_POWERED,
+};
+
+struct scmi_imx_lmm_info {
+ u32 lmid;
+ enum scmi_imx_lmm_state state;
+ u32 errstatus;
+ u8 name[LMM_MAX_NAME];
+};
+
+struct scmi_imx_lmm_proto_ops {
+ int (*lmm_power_boot)(const struct scmi_protocol_handle *ph, u32 lmid,
+ bool boot);
+ int (*lmm_info)(const struct scmi_protocol_handle *ph, u32 lmid,
+ struct scmi_imx_lmm_info *info);
+ int (*lmm_reset_vector_set)(const struct scmi_protocol_handle *ph,
+ u32 lmid, u32 cpuid, u32 flags, u64 vector);
+ int (*lmm_shutdown)(const struct scmi_protocol_handle *ph, u32 lmid,
+ u32 flags);
+};
+
+struct scmi_imx_cpu_proto_ops {
+ int (*cpu_reset_vector_set)(const struct scmi_protocol_handle *ph,
+ u32 cpuid, u64 vector, bool start,
+ bool boot, bool resume);
+ int (*cpu_start)(const struct scmi_protocol_handle *ph, u32 cpuid,
+ bool start);
+ int (*cpu_started)(const struct scmi_protocol_handle *ph, u32 cpuid,
+ bool *started);
+};
#endif
diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h
index 04655faadc2d..89706157e622 100644
--- a/include/linux/semaphore.h
+++ b/include/linux/semaphore.h
@@ -16,13 +16,25 @@ struct semaphore {
raw_spinlock_t lock;
unsigned int count;
struct list_head wait_list;
+
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+ unsigned long last_holder;
+#endif
};
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+#define __LAST_HOLDER_SEMAPHORE_INITIALIZER \
+ , .last_holder = 0UL
+#else
+#define __LAST_HOLDER_SEMAPHORE_INITIALIZER
+#endif
+
#define __SEMAPHORE_INITIALIZER(name, n) \
{ \
.lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \
.count = n, \
- .wait_list = LIST_HEAD_INIT((name).wait_list), \
+ .wait_list = LIST_HEAD_INIT((name).wait_list) \
+ __LAST_HOLDER_SEMAPHORE_INITIALIZER \
}
/*
@@ -47,5 +59,6 @@ extern int __must_check down_killable(struct semaphore *sem);
extern int __must_check down_trylock(struct semaphore *sem);
extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
extern void up(struct semaphore *sem);
+extern unsigned long sem_last_holder(struct semaphore *sem);
#endif /* __LINUX_SEMAPHORE_H */
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 8e5d78fb4847..7a69210a250c 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -24,6 +24,7 @@
#define LLCC_CMPTDMA 15
#define LLCC_DISP 16
#define LLCC_VIDFW 17
+#define LLCC_CAMFW 18
#define LLCC_MDMHPFX 20
#define LLCC_MDMPNG 21
#define LLCC_AUDHW 22
@@ -67,6 +68,13 @@
#define LLCC_EVCS_LEFT 67
#define LLCC_EVCS_RIGHT 68
#define LLCC_SPAD 69
+#define LLCC_VIDDEC 70
+#define LLCC_CAMOFE 71
+#define LLCC_CAMRTIP 72
+#define LLCC_CAMSRTIP 73
+#define LLCC_CAMRTRF 74
+#define LLCC_CAMSRTRF 75
+#define LLCC_CPUSSMPAM 89
/**
* struct llcc_slice_desc - Cache slice descriptor
diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index ce1a3790d6fb..0d5a17ea8fb8 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -658,9 +658,20 @@
#define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION (0x32A8)
/* For Tensor GS101 */
+/* PMU ALIVE */
#define GS101_SYSIP_DAT0 (0x810)
+#define GS101_CPU0_INFORM (0x860)
+#define GS101_CPU_INFORM(cpu) \
+ (GS101_CPU0_INFORM + (cpu*4))
#define GS101_SYSTEM_CONFIGURATION (0x3A00)
#define GS101_PHY_CTRL_USB20 (0x3EB0)
#define GS101_PHY_CTRL_USBDP (0x3EB4)
+/* PMU INTR GEN */
+#define GS101_GRP1_INTR_BID_UPEND (0x0108)
+#define GS101_GRP1_INTR_BID_CLEAR (0x010c)
+#define GS101_GRP2_INTR_BID_ENABLE (0x0200)
+#define GS101_GRP2_INTR_BID_UPEND (0x0208)
+#define GS101_GRP2_INTR_BID_CLEAR (0x020c)
+
#endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */
diff --git a/include/linux/sort.h b/include/linux/sort.h
index 8e5603b10941..c01ef804a0eb 100644
--- a/include/linux/sort.h
+++ b/include/linux/sort.h
@@ -4,6 +4,16 @@
#include <linux/types.h>
+/**
+ * cmp_int - perform a three-way comparison of the arguments
+ * @l: the left argument
+ * @r: the right argument
+ *
+ * Return: 1 if the left argument is greater than the right one; 0 if the
+ * arguments are equal; -1 if the left argument is less than the right one.
+ */
+#define cmp_int(l, r) (((l) > (r)) - ((l) < (r)))
+
void sort_r(void *base, size_t num, size_t size,
cmp_r_func_t cmp_func,
swap_r_func_t swap_func,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index db46b25a65ae..bc0e1c275fc0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -414,6 +414,10 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
#define MIN_SWAPPINESS 0
#define MAX_SWAPPINESS 200
+
+/* Just recliam from anon folios in proactive memory reclaim */
+#define SWAPPINESS_ANON_ONLY (MAX_SWAPPINESS + 1)
+
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
@@ -450,7 +454,7 @@ static inline unsigned long total_swapcache_pages(void)
}
void free_swap_cache(struct folio *folio);
-void free_page_and_swap_cache(struct page *);
+void free_folio_and_swap_cache(struct folio *folio);
void free_pages_and_swap_cache(struct encoded_page **, int);
/* linux/mm/swapfile.c */
extern atomic_long_t nr_swap_pages;
@@ -520,10 +524,8 @@ static inline void put_swap_device(struct swap_info_struct *si)
#define si_swapinfo(val) \
do { (val)->freeswap = (val)->totalswap = 0; } while (0)
-/* only sparc can not include linux/pagemap.h in this file
- * so leave put_page and release_pages undeclared... */
-#define free_page_and_swap_cache(page) \
- put_page(page)
+#define free_folio_and_swap_cache(folio) \
+ folio_put(folio)
#define free_pages_and_swap_cache(pages, nr) \
release_pages((pages), (nr));
diff --git a/include/linux/turris-signing-key.h b/include/linux/turris-signing-key.h
new file mode 100644
index 000000000000..8a435b73c3a9
--- /dev/null
+++ b/include/linux/turris-signing-key.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * 2025 by Marek BehĂșn <kabel@kernel.org>
+ */
+
+#ifndef __TURRIS_SIGNING_KEY_H
+#define __TURRIS_SIGNING_KEY_H
+
+#include <linux/key.h>
+#include <linux/types.h>
+
+struct device;
+
+#ifdef CONFIG_KEYS
+struct turris_signing_key_subtype {
+ u16 key_size;
+ u8 data_size;
+ u8 sig_size;
+ u8 public_key_size;
+ const char *hash_algo;
+ const void *(*get_public_key)(const struct key *key);
+ int (*sign)(const struct key *key, const void *msg, void *signature);
+};
+
+static inline struct device *turris_signing_key_get_dev(const struct key *key)
+{
+ return key->payload.data[1];
+}
+
+int
+devm_turris_signing_key_create(struct device *dev, const struct turris_signing_key_subtype *subtype,
+ const char *desc);
+#endif
+
+#endif /* __TURRIS_SIGNING_KEY_H */
diff --git a/include/linux/types.h b/include/linux/types.h
index 49b79c8bb1a9..6dfdb8e8e4c3 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -136,6 +136,10 @@ typedef s64 ktime_t;
typedef u64 sector_t;
typedef u64 blkcnt_t;
+/* generic data direction definitions */
+#define READ 0
+#define WRITE 1
+
/*
* The type of an index into the pagecache.
*/
diff --git a/include/linux/unroll.h b/include/linux/unroll.h
index 863fb69f6a7e..186b71de740f 100644
--- a/include/linux/unroll.h
+++ b/include/linux/unroll.h
@@ -11,10 +11,8 @@
#ifdef CONFIG_CC_IS_CLANG
#define __pick_unrolled(x, y) _Pragma(#x)
-#elif CONFIG_GCC_VERSION >= 80000
-#define __pick_unrolled(x, y) _Pragma(#y)
#else
-#define __pick_unrolled(x, y) /* not supported */
+#define __pick_unrolled(x, y) _Pragma(#y)
#endif
/**
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 2e46b69ff0a6..516217c39094 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -188,13 +188,13 @@ struct uprobes_state {
};
extern void __init uprobes_init(void);
-extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
-extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
+extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr);
+extern int set_orig_insn(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr);
extern bool is_swbp_insn(uprobe_opcode_t *insn);
extern bool is_trap_insn(uprobe_opcode_t *insn);
extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
-extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
+extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t);
extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool);
extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc);
diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h
index 3b570b765b75..9373962aade9 100644
--- a/include/linux/util_macros.h
+++ b/include/linux/util_macros.h
@@ -2,7 +2,10 @@
#ifndef _LINUX_HELPER_MACROS_H_
#define _LINUX_HELPER_MACROS_H_
+#include <linux/compiler_attributes.h>
#include <linux/math.h>
+#include <linux/typecheck.h>
+#include <linux/stddef.h>
/**
* for_each_if - helper for handling conditionals in various for_each macros
@@ -80,6 +83,72 @@
})
/**
+ * PTR_IF - evaluate to @ptr if @cond is true, or to NULL otherwise.
+ * @cond: A conditional, usually in a form of IS_ENABLED(CONFIG_FOO)
+ * @ptr: A pointer to assign if @cond is true.
+ *
+ * PTR_IF(IS_ENABLED(CONFIG_FOO), ptr) evaluates to @ptr if CONFIG_FOO is set
+ * to 'y' or 'm', or to NULL otherwise. The @ptr argument must be a pointer.
+ *
+ * The macro can be very useful to help compiler dropping dead code.
+ *
+ * For instance, consider the following::
+ *
+ * #ifdef CONFIG_FOO_SUSPEND
+ * static int foo_suspend(struct device *dev)
+ * {
+ * ...
+ * }
+ * #endif
+ *
+ * static struct pm_ops foo_ops = {
+ * #ifdef CONFIG_FOO_SUSPEND
+ * .suspend = foo_suspend,
+ * #endif
+ * };
+ *
+ * While this works, the foo_suspend() macro is compiled conditionally,
+ * only when CONFIG_FOO_SUSPEND is set. This is problematic, as there could
+ * be a build bug in this function, we wouldn't have a way to know unless
+ * the configuration option is set.
+ *
+ * An alternative is to declare foo_suspend() always, but mark it
+ * as __maybe_unused. This works, but the __maybe_unused attribute
+ * is required to instruct the compiler that the function may not
+ * be referenced anywhere, and is safe to remove without making
+ * a fuss about it. This makes the programmer responsible for tagging
+ * the functions that can be garbage-collected.
+ *
+ * With the macro it is possible to write the following:
+ *
+ * static int foo_suspend(struct device *dev)
+ * {
+ * ...
+ * }
+ *
+ * static struct pm_ops foo_ops = {
+ * .suspend = PTR_IF(IS_ENABLED(CONFIG_FOO_SUSPEND), foo_suspend),
+ * };
+ *
+ * The foo_suspend() function will now be automatically dropped by the
+ * compiler, and it does not require any specific attribute.
+ */
+#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL)
+
+/**
+ * to_user_ptr - cast a pointer passed as u64 from user space to void __user *
+ * @x: The u64 value from user space, usually via IOCTL
+ *
+ * to_user_ptr() simply casts a pointer passed as u64 from user space to void
+ * __user * correctly. Using this lets us get rid of all the tiresome casts.
+ */
+#define u64_to_user_ptr(x) \
+({ \
+ typecheck(u64, (x)); \
+ (void __user *)(uintptr_t)(x); \
+})
+
+/**
* is_insidevar - check if the @ptr points inside the @var memory range.
* @ptr: the pointer to a memory address.
* @var: the variable which address and size identify the memory range.
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 78eede109b1a..be850174e802 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -965,10 +965,12 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id,
* Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
* in xa_init_flags().
*
+ * Note that callers interested in whether wrapping has occurred should
+ * use __xa_alloc_cyclic() instead.
+ *
* Context: Any context. Takes and releases the xa_lock. May sleep if
* the @gfp flags permit.
- * Return: 0 if the allocation succeeded without wrapping. 1 if the
- * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be
* allocated or -EBUSY if there are no free entries in @limit.
*/
static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
@@ -981,7 +983,7 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
xa_unlock(xa);
- return err;
+ return err < 0 ? err : 0;
}
/**
@@ -1002,10 +1004,12 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
* Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
* in xa_init_flags().
*
+ * Note that callers interested in whether wrapping has occurred should
+ * use __xa_alloc_cyclic() instead.
+ *
* Context: Any context. Takes and releases the xa_lock while
* disabling softirqs. May sleep if the @gfp flags permit.
- * Return: 0 if the allocation succeeded without wrapping. 1 if the
- * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be
* allocated or -EBUSY if there are no free entries in @limit.
*/
static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
@@ -1018,7 +1022,7 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
xa_unlock_bh(xa);
- return err;
+ return err < 0 ? err : 0;
}
/**
@@ -1039,10 +1043,12 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
* Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set
* in xa_init_flags().
*
+ * Note that callers interested in whether wrapping has occurred should
+ * use __xa_alloc_cyclic() instead.
+ *
* Context: Process context. Takes and releases the xa_lock while
* disabling interrupts. May sleep if the @gfp flags permit.
- * Return: 0 if the allocation succeeded without wrapping. 1 if the
- * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be
* allocated or -EBUSY if there are no free entries in @limit.
*/
static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry,
@@ -1055,7 +1061,7 @@ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry,
err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
xa_unlock_irq(xa);
- return err;
+ return err < 0 ? err : 0;
}
/**
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 52f30e526607..369ef068fad8 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -22,7 +22,7 @@ const char *zpool_get_type(struct zpool *pool);
void zpool_destroy_pool(struct zpool *pool);
int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
- unsigned long *handle);
+ unsigned long *handle, const int nid);
void zpool_free(struct zpool *pool, unsigned long handle);
@@ -64,7 +64,7 @@ struct zpool_driver {
void (*destroy)(void *pool);
int (*malloc)(void *pool, size_t size, gfp_t gfp,
- unsigned long *handle);
+ unsigned long *handle, const int nid);
void (*free)(void *pool, unsigned long handle);
void *(*obj_read_begin)(void *pool, unsigned long handle,
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index c26baf9fb331..13e9cc5490f7 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -26,7 +26,8 @@ struct zs_pool;
struct zs_pool *zs_create_pool(const char *name);
void zs_destroy_pool(struct zs_pool *pool);
-unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags);
+unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags,
+ const int nid);
void zs_free(struct zs_pool *pool, unsigned long obj);
size_t zs_huge_class_size(struct zs_pool *pool);
diff --git a/include/soc/qcom/qcom-spmi-pmic.h b/include/soc/qcom/qcom-spmi-pmic.h
index a62d500a6fda..df3d3a0af98a 100644
--- a/include/soc/qcom/qcom-spmi-pmic.h
+++ b/include/soc/qcom/qcom-spmi-pmic.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (c) 2022 Linaro. All rights reserved.
- * Author: Caleb Connolly <caleb.connolly@linaro.org>
+ * Author: Casey Connolly <casey.connolly@linaro.org>
*/
#ifndef __QCOM_SPMI_PMIC_H__
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index 9d5c00b0285c..2305df6cb485 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -55,10 +55,10 @@ SCAN_STATUS
TRACE_EVENT(mm_khugepaged_scan_pmd,
- TP_PROTO(struct mm_struct *mm, struct page *page, bool writable,
+ TP_PROTO(struct mm_struct *mm, struct folio *folio, bool writable,
int referenced, int none_or_zero, int status, int unmapped),
- TP_ARGS(mm, page, writable, referenced, none_or_zero, status, unmapped),
+ TP_ARGS(mm, folio, writable, referenced, none_or_zero, status, unmapped),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
@@ -72,7 +72,7 @@ TRACE_EVENT(mm_khugepaged_scan_pmd,
TP_fast_assign(
__entry->mm = mm;
- __entry->pfn = page ? page_to_pfn(page) : -1;
+ __entry->pfn = folio ? folio_pfn(folio) : -1;
__entry->writable = writable;
__entry->referenced = referenced;
__entry->none_or_zero = none_or_zero;
@@ -116,10 +116,10 @@ TRACE_EVENT(mm_collapse_huge_page,
TRACE_EVENT(mm_collapse_huge_page_isolate,
- TP_PROTO(struct page *page, int none_or_zero,
+ TP_PROTO(struct folio *folio, int none_or_zero,
int referenced, bool writable, int status),
- TP_ARGS(page, none_or_zero, referenced, writable, status),
+ TP_ARGS(folio, none_or_zero, referenced, writable, status),
TP_STRUCT__entry(
__field(unsigned long, pfn)
@@ -130,7 +130,7 @@ TRACE_EVENT(mm_collapse_huge_page_isolate,
),
TP_fast_assign(
- __entry->pfn = page ? page_to_pfn(page) : -1;
+ __entry->pfn = folio ? folio_pfn(folio) : -1;
__entry->none_or_zero = none_or_zero;
__entry->referenced = referenced;
__entry->writable = writable;
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 15aae955a10b..aa441f593e9a 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -172,9 +172,7 @@ IF_HAVE_PG_ARCH_3(arch_3)
__def_pageflag_names \
) : "none"
-#if defined(CONFIG_X86)
-#define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" }
-#elif defined(CONFIG_PPC64)
+#if defined(CONFIG_PPC64)
#define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" }
#elif defined(CONFIG_PARISC)
#define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" }
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 0c7545b4ffbc..4e6b2910cec3 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -771,6 +771,39 @@ TRACE_EVENT(sched_skip_vma_numa,
__entry->vm_end,
__print_symbolic(__entry->reason, NUMAB_SKIP_REASON))
);
+
+TRACE_EVENT(sched_skip_cpuset_numa,
+
+ TP_PROTO(struct task_struct *tsk, nodemask_t *mem_allowed_ptr),
+
+ TP_ARGS(tsk, mem_allowed_ptr),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( pid_t, pid )
+ __field( pid_t, tgid )
+ __field( pid_t, ngid )
+ __array( unsigned long, mem_allowed, BITS_TO_LONGS(MAX_NUMNODES))
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+ __entry->pid = task_pid_nr(tsk);
+ __entry->tgid = task_tgid_nr(tsk);
+ __entry->ngid = task_numa_group_id(tsk);
+ BUILD_BUG_ON(sizeof(nodemask_t) != \
+ BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long));
+ memcpy(__entry->mem_allowed, mem_allowed_ptr->bits,
+ sizeof(__entry->mem_allowed));
+ ),
+
+ TP_printk("comm=%s pid=%d tgid=%d ngid=%d mem_nodes_allowed=%*pbl",
+ __entry->comm,
+ __entry->pid,
+ __entry->tgid,
+ __entry->ngid,
+ MAX_NUMNODES, __entry->mem_allowed)
+);
#endif /* CONFIG_NUMA_BALANCING */
/*
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index e762e1af650c..0098b0ce8ccb 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -361,6 +361,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_PFNZERO (1 << 5)
#define PAGE_IS_HUGE (1 << 6)
#define PAGE_IS_SOFT_DIRTY (1 << 7)
+#define PAGE_IS_GUARD (1 << 8)
/*
* struct page_region - Page region with flags
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 72c038fc71d0..5f8ef6156752 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -74,6 +74,7 @@ struct seccomp_metadata {
};
#define PTRACE_GET_SYSCALL_INFO 0x420e
+#define PTRACE_SET_SYSCALL_INFO 0x4212
#define PTRACE_SYSCALL_INFO_NONE 0
#define PTRACE_SYSCALL_INFO_ENTRY 1
#define PTRACE_SYSCALL_INFO_EXIT 2
@@ -81,7 +82,8 @@ struct seccomp_metadata {
struct ptrace_syscall_info {
__u8 op; /* PTRACE_SYSCALL_INFO_* */
- __u8 pad[3];
+ __u8 reserved;
+ __u16 flags;
__u32 arch;
__u64 instruction_pointer;
__u64 stack_pointer;
@@ -98,6 +100,7 @@ struct ptrace_syscall_info {
__u64 nr;
__u64 args[6];
__u32 ret_data;
+ __u32 reserved2;
} seccomp;
};
};
@@ -142,6 +145,8 @@ struct ptrace_sud_config {
__u64 len;
};
+/* 0x4212 is PTRACE_SET_SYSCALL_INFO */
+
/*
* These values are stored in task->ptrace_message
* by ptrace_stop to describe the current syscall-stop.
diff --git a/include/video/mach64.h b/include/video/mach64.h
index d96e3c189634..f1709f7c8421 100644
--- a/include/video/mach64.h
+++ b/include/video/mach64.h
@@ -934,9 +934,6 @@
#define MEM_BNDRY_EN 0x00040000
#define ONE_MB 0x100000
-/* ATI PCI constants */
-#define PCI_ATI_VENDOR_ID 0x1002
-
/* CNFG_CHIP_ID register constants */
#define CFG_CHIP_TYPE 0x0000FFFF