summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/bug.h80
-rw-r--r--include/asm-generic/thread_info_tif.h3
-rw-r--r--include/crypto/blake2b.h143
-rw-r--r--include/crypto/blake2s.h126
-rw-r--r--include/crypto/chacha.h12
-rw-r--r--include/crypto/chacha20poly1305.h19
-rw-r--r--include/crypto/curve25519.h24
-rw-r--r--include/crypto/internal/blake2b.h101
-rw-r--r--include/crypto/md5.h11
-rw-r--r--include/crypto/poly1305.h2
-rw-r--r--include/crypto/polyval.h182
-rw-r--r--include/crypto/sha1.h12
-rw-r--r--include/crypto/sha2.h53
-rw-r--r--include/crypto/sha3.h320
-rw-r--r--include/linux/acpi.h49
-rw-r--r--include/linux/arch_topology.h17
-rw-r--r--include/linux/arm_mpam.h66
-rw-r--r--include/linux/bitmap.h15
-rw-r--r--include/linux/bug.h8
-rw-r--r--include/linux/byteorder/generic.h16
-rw-r--r--include/linux/cc_platform.h2
-rw-r--r--include/linux/cleanup.h30
-rw-r--r--include/linux/compiler_types.h15
-rw-r--r--include/linux/cpuidle.h6
-rw-r--r--include/linux/cpumask.h28
-rw-r--r--include/linux/delay.h8
-rw-r--r--include/linux/devfreq-governor.h102
-rw-r--r--include/linux/efi.h2
-rw-r--r--include/linux/energy_model.h4
-rw-r--r--include/linux/entry-common.h38
-rw-r--r--include/linux/freezer.h12
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/intel_rapl.h2
-rw-r--r--include/linux/interrupt.h25
-rw-r--r--include/linux/irq-entry-common.h75
-rw-r--r--include/linux/irq.h5
-rw-r--r--include/linux/irq_work.h9
-rw-r--r--include/linux/irq_work_types.h14
-rw-r--r--include/linux/irqchip.h8
-rw-r--r--include/linux/irqchip/irq-partition-percpu.h53
-rw-r--r--include/linux/irqdesc.h1
-rw-r--r--include/linux/irqdomain.h33
-rw-r--r--include/linux/kvm_types.h14
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/memory.h9
-rw-r--r--include/linux/memory_hotplug.h18
-rw-r--r--include/linux/memremap.h1
-rw-r--r--include/linux/mm.h25
-rw-r--r--include/linux/mm_types.h128
-rw-r--r--include/linux/msi.h3
-rw-r--r--include/linux/of_irq.h7
-rw-r--r--include/linux/percpu-defs.h2
-rw-r--r--include/linux/perf/arm_pmu.h7
-rw-r--r--include/linux/pgtable.h4
-rw-r--r--include/linux/platform_device.h3
-rw-r--r--include/linux/pm.h8
-rw-r--r--include/linux/pm_domain.h1
-rw-r--r--include/linux/pm_qos.h9
-rw-r--r--include/linux/pm_runtime.h24
-rw-r--r--include/linux/prandom.h6
-rw-r--r--include/linux/preempt.h2
-rw-r--r--include/linux/prmt.h2
-rw-r--r--include/linux/random.h15
-rw-r--r--include/linux/resctrl.h24
-rw-r--r--include/linux/restart_block.h2
-rw-r--r--include/linux/resume_user_mode.h2
-rw-r--r--include/linux/rseq.h214
-rw-r--r--include/linux/rseq_entry.h616
-rw-r--r--include/linux/rseq_types.h164
-rw-r--r--include/linux/sched.h58
-rw-r--r--include/linux/thread_info.h5
-rw-r--r--include/linux/timer.h9
-rw-r--r--include/linux/uaccess.h314
-rw-r--r--include/trace/events/power.h3
-rw-r--r--include/trace/events/rseq.h4
-rw-r--r--include/trace/events/timer_migration.h4
-rw-r--r--include/uapi/linux/energy_model.h62
-rw-r--r--include/uapi/linux/perf_event.h2
-rw-r--r--include/uapi/linux/rseq.h21
79 files changed, 2769 insertions, 758 deletions
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 387720933973..09e8eccee8ed 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -13,10 +13,19 @@
#define BUGFLAG_ONCE (1 << 1)
#define BUGFLAG_DONE (1 << 2)
#define BUGFLAG_NO_CUT_HERE (1 << 3) /* CUT_HERE already sent */
+#define BUGFLAG_ARGS (1 << 4)
#define BUGFLAG_TAINT(taint) ((taint) << 8)
#define BUG_GET_TAINT(bug) ((bug)->flags >> 8)
#endif
+#ifndef WARN_CONDITION_STR
+#ifdef CONFIG_DEBUG_BUGVERBOSE_DETAILED
+# define WARN_CONDITION_STR(cond_str) "[" cond_str "] "
+#else
+# define WARN_CONDITION_STR(cond_str)
+#endif
+#endif /* WARN_CONDITION_STR */
+
#ifndef __ASSEMBLY__
#include <linux/panic.h>
#include <linux/printk.h>
@@ -29,19 +38,20 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
#ifdef CONFIG_BUG
-#ifdef CONFIG_GENERIC_BUG
-struct bug_entry {
#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
- unsigned long bug_addr;
+#define BUG_REL(type, name) type name
#else
- signed int bug_addr_disp;
+#define BUG_REL(type, name) signed int name##_disp
#endif
-#ifdef CONFIG_DEBUG_BUGVERBOSE
-#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
- const char *file;
-#else
- signed int file_disp;
+
+#ifdef CONFIG_GENERIC_BUG
+struct bug_entry {
+ BUG_REL(unsigned long, bug_addr);
+#ifdef HAVE_ARCH_BUG_FORMAT
+ BUG_REL(const char *, format);
#endif
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+ BUG_REL(const char *, file);
unsigned short line;
#endif
unsigned short flags;
@@ -92,28 +102,50 @@ void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
const char *fmt, ...);
extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
-#ifndef __WARN_FLAGS
-#define __WARN() __WARN_printf(TAINT_WARN, NULL)
+#ifdef __WARN_FLAGS
+#define __WARN() __WARN_FLAGS("", BUGFLAG_TAINT(TAINT_WARN))
+
+#ifndef WARN_ON
+#define WARN_ON(condition) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ __WARN_FLAGS(#condition, \
+ BUGFLAG_TAINT(TAINT_WARN)); \
+ unlikely(__ret_warn_on); \
+})
+#endif
+
+#ifndef WARN_ON_ONCE
+#define WARN_ON_ONCE(condition) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ __WARN_FLAGS(#condition, \
+ BUGFLAG_ONCE | \
+ BUGFLAG_TAINT(TAINT_WARN)); \
+ unlikely(__ret_warn_on); \
+})
+#endif
+#endif /* __WARN_FLAGS */
+
+#if defined(__WARN_FLAGS) && !defined(__WARN_printf)
#define __WARN_printf(taint, arg...) do { \
instrumentation_begin(); \
- warn_slowpath_fmt(__FILE__, __LINE__, taint, arg); \
+ __warn_printk(arg); \
+ __WARN_FLAGS("", BUGFLAG_NO_CUT_HERE | BUGFLAG_TAINT(taint));\
instrumentation_end(); \
} while (0)
-#else
-#define __WARN() __WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN))
+#endif
+
+#ifndef __WARN_printf
#define __WARN_printf(taint, arg...) do { \
instrumentation_begin(); \
- __warn_printk(arg); \
- __WARN_FLAGS(BUGFLAG_NO_CUT_HERE | BUGFLAG_TAINT(taint));\
+ warn_slowpath_fmt(__FILE__, __LINE__, taint, arg); \
instrumentation_end(); \
} while (0)
-#define WARN_ON_ONCE(condition) ({ \
- int __ret_warn_on = !!(condition); \
- if (unlikely(__ret_warn_on)) \
- __WARN_FLAGS(BUGFLAG_ONCE | \
- BUGFLAG_TAINT(TAINT_WARN)); \
- unlikely(__ret_warn_on); \
-})
+#endif
+
+#ifndef __WARN
+#define __WARN() __WARN_printf(TAINT_WARN, NULL)
#endif
/* used internally by panic.c */
@@ -148,8 +180,10 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
DO_ONCE_LITE_IF(condition, WARN_ON, 1)
#endif
+#ifndef WARN_ONCE
#define WARN_ONCE(condition, format...) \
DO_ONCE_LITE_IF(condition, WARN, 1, format)
+#endif
#define WARN_TAINT_ONCE(condition, taint, format...) \
DO_ONCE_LITE_IF(condition, WARN_TAINT, 1, taint, format)
diff --git a/include/asm-generic/thread_info_tif.h b/include/asm-generic/thread_info_tif.h
index ee3793e9b1a4..da1610a78f92 100644
--- a/include/asm-generic/thread_info_tif.h
+++ b/include/asm-generic/thread_info_tif.h
@@ -45,4 +45,7 @@
# define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
#endif
+#define TIF_RSEQ 11 // Run RSEQ fast path
+#define _TIF_RSEQ BIT(TIF_RSEQ)
+
#endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */
diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h
index dd7694477e50..3bc37fd103a7 100644
--- a/include/crypto/blake2b.h
+++ b/include/crypto/blake2b.h
@@ -7,20 +7,10 @@
#include <linux/types.h>
#include <linux/string.h>
-struct blake2b_state {
- /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */
- u64 h[8];
- u64 t[2];
- /* The true state ends here. The rest is temporary storage. */
- u64 f[2];
-};
-
enum blake2b_lengths {
BLAKE2B_BLOCK_SIZE = 128,
BLAKE2B_HASH_SIZE = 64,
BLAKE2B_KEY_SIZE = 64,
- BLAKE2B_STATE_SIZE = offsetof(struct blake2b_state, f),
- BLAKE2B_DESC_SIZE = sizeof(struct blake2b_state),
BLAKE2B_160_HASH_SIZE = 20,
BLAKE2B_256_HASH_SIZE = 32,
@@ -28,6 +18,25 @@ enum blake2b_lengths {
BLAKE2B_512_HASH_SIZE = 64,
};
+/**
+ * struct blake2b_ctx - Context for hashing a message with BLAKE2b
+ * @h: compression function state
+ * @t: block counter
+ * @f: finalization indicator
+ * @buf: partial block buffer; 'buflen' bytes are valid
+ * @buflen: number of bytes buffered in @buf
+ * @outlen: length of output hash value in bytes, at most BLAKE2B_HASH_SIZE
+ */
+struct blake2b_ctx {
+ /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */
+ u64 h[8];
+ u64 t[2];
+ u64 f[2];
+ u8 buf[BLAKE2B_BLOCK_SIZE];
+ unsigned int buflen;
+ unsigned int outlen;
+};
+
enum blake2b_iv {
BLAKE2B_IV0 = 0x6A09E667F3BCC908ULL,
BLAKE2B_IV1 = 0xBB67AE8584CAA73BULL,
@@ -39,19 +48,109 @@ enum blake2b_iv {
BLAKE2B_IV7 = 0x5BE0CD19137E2179ULL,
};
-static inline void __blake2b_init(struct blake2b_state *state, size_t outlen,
- size_t keylen)
+static inline void __blake2b_init(struct blake2b_ctx *ctx, size_t outlen,
+ const void *key, size_t keylen)
+{
+ ctx->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen);
+ ctx->h[1] = BLAKE2B_IV1;
+ ctx->h[2] = BLAKE2B_IV2;
+ ctx->h[3] = BLAKE2B_IV3;
+ ctx->h[4] = BLAKE2B_IV4;
+ ctx->h[5] = BLAKE2B_IV5;
+ ctx->h[6] = BLAKE2B_IV6;
+ ctx->h[7] = BLAKE2B_IV7;
+ ctx->t[0] = 0;
+ ctx->t[1] = 0;
+ ctx->f[0] = 0;
+ ctx->f[1] = 0;
+ ctx->buflen = 0;
+ ctx->outlen = outlen;
+ if (keylen) {
+ memcpy(ctx->buf, key, keylen);
+ memset(&ctx->buf[keylen], 0, BLAKE2B_BLOCK_SIZE - keylen);
+ ctx->buflen = BLAKE2B_BLOCK_SIZE;
+ }
+}
+
+/**
+ * blake2b_init() - Initialize a BLAKE2b context for a new message (unkeyed)
+ * @ctx: the context to initialize
+ * @outlen: length of output hash value in bytes, at most BLAKE2B_HASH_SIZE
+ *
+ * Context: Any context.
+ */
+static inline void blake2b_init(struct blake2b_ctx *ctx, size_t outlen)
{
- state->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen);
- state->h[1] = BLAKE2B_IV1;
- state->h[2] = BLAKE2B_IV2;
- state->h[3] = BLAKE2B_IV3;
- state->h[4] = BLAKE2B_IV4;
- state->h[5] = BLAKE2B_IV5;
- state->h[6] = BLAKE2B_IV6;
- state->h[7] = BLAKE2B_IV7;
- state->t[0] = 0;
- state->t[1] = 0;
+ __blake2b_init(ctx, outlen, NULL, 0);
+}
+
+/**
+ * blake2b_init_key() - Initialize a BLAKE2b context for a new message (keyed)
+ * @ctx: the context to initialize
+ * @outlen: length of output hash value in bytes, at most BLAKE2B_HASH_SIZE
+ * @key: the key
+ * @keylen: the key length in bytes, at most BLAKE2B_KEY_SIZE
+ *
+ * Context: Any context.
+ */
+static inline void blake2b_init_key(struct blake2b_ctx *ctx, size_t outlen,
+ const void *key, size_t keylen)
+{
+ WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2B_HASH_SIZE ||
+ !key || !keylen || keylen > BLAKE2B_KEY_SIZE));
+
+ __blake2b_init(ctx, outlen, key, keylen);
+}
+
+/**
+ * blake2b_update() - Update a BLAKE2b context with message data
+ * @ctx: the context to update; must have been initialized
+ * @in: the message data
+ * @inlen: the data length in bytes
+ *
+ * This can be called any number of times.
+ *
+ * Context: Any context.
+ */
+void blake2b_update(struct blake2b_ctx *ctx, const u8 *in, size_t inlen);
+
+/**
+ * blake2b_final() - Finish computing a BLAKE2b hash
+ * @ctx: the context to finalize; must have been initialized
+ * @out: (output) the resulting BLAKE2b hash. Its length will be equal to the
+ * @outlen that was passed to blake2b_init() or blake2b_init_key().
+ *
+ * After finishing, this zeroizes @ctx. So the caller does not need to do it.
+ *
+ * Context: Any context.
+ */
+void blake2b_final(struct blake2b_ctx *ctx, u8 *out);
+
+/**
+ * blake2b() - Compute BLAKE2b hash in one shot
+ * @key: the key, or NULL for an unkeyed hash
+ * @keylen: the key length in bytes (at most BLAKE2B_KEY_SIZE), or 0 for an
+ * unkeyed hash
+ * @in: the message data
+ * @inlen: the data length in bytes
+ * @out: (output) the resulting BLAKE2b hash, with length @outlen
+ * @outlen: length of output hash value in bytes, at most BLAKE2B_HASH_SIZE
+ *
+ * Context: Any context.
+ */
+static inline void blake2b(const u8 *key, size_t keylen,
+ const u8 *in, size_t inlen,
+ u8 *out, size_t outlen)
+{
+ struct blake2b_ctx ctx;
+
+ WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
+ outlen > BLAKE2B_HASH_SIZE || keylen > BLAKE2B_KEY_SIZE ||
+ (!key && keylen)));
+
+ __blake2b_init(&ctx, outlen, key, keylen);
+ blake2b_update(&ctx, in, inlen);
+ blake2b_final(&ctx, out);
}
#endif /* _CRYPTO_BLAKE2B_H */
diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
index f9ffd39194eb..648cb7824358 100644
--- a/include/crypto/blake2s.h
+++ b/include/crypto/blake2s.h
@@ -22,7 +22,16 @@ enum blake2s_lengths {
BLAKE2S_256_HASH_SIZE = 32,
};
-struct blake2s_state {
+/**
+ * struct blake2s_ctx - Context for hashing a message with BLAKE2s
+ * @h: compression function state
+ * @t: block counter
+ * @f: finalization indicator
+ * @buf: partial block buffer; 'buflen' bytes are valid
+ * @buflen: number of bytes buffered in @buf
+ * @outlen: length of output hash value in bytes, at most BLAKE2S_HASH_SIZE
+ */
+struct blake2s_ctx {
/* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */
u32 h[8];
u32 t[2];
@@ -43,62 +52,109 @@ enum blake2s_iv {
BLAKE2S_IV7 = 0x5BE0CD19UL,
};
-static inline void __blake2s_init(struct blake2s_state *state, size_t outlen,
+static inline void __blake2s_init(struct blake2s_ctx *ctx, size_t outlen,
const void *key, size_t keylen)
{
- state->h[0] = BLAKE2S_IV0 ^ (0x01010000 | keylen << 8 | outlen);
- state->h[1] = BLAKE2S_IV1;
- state->h[2] = BLAKE2S_IV2;
- state->h[3] = BLAKE2S_IV3;
- state->h[4] = BLAKE2S_IV4;
- state->h[5] = BLAKE2S_IV5;
- state->h[6] = BLAKE2S_IV6;
- state->h[7] = BLAKE2S_IV7;
- state->t[0] = 0;
- state->t[1] = 0;
- state->f[0] = 0;
- state->f[1] = 0;
- state->buflen = 0;
- state->outlen = outlen;
+ ctx->h[0] = BLAKE2S_IV0 ^ (0x01010000 | keylen << 8 | outlen);
+ ctx->h[1] = BLAKE2S_IV1;
+ ctx->h[2] = BLAKE2S_IV2;
+ ctx->h[3] = BLAKE2S_IV3;
+ ctx->h[4] = BLAKE2S_IV4;
+ ctx->h[5] = BLAKE2S_IV5;
+ ctx->h[6] = BLAKE2S_IV6;
+ ctx->h[7] = BLAKE2S_IV7;
+ ctx->t[0] = 0;
+ ctx->t[1] = 0;
+ ctx->f[0] = 0;
+ ctx->f[1] = 0;
+ ctx->buflen = 0;
+ ctx->outlen = outlen;
if (keylen) {
- memcpy(state->buf, key, keylen);
- memset(&state->buf[keylen], 0, BLAKE2S_BLOCK_SIZE - keylen);
- state->buflen = BLAKE2S_BLOCK_SIZE;
+ memcpy(ctx->buf, key, keylen);
+ memset(&ctx->buf[keylen], 0, BLAKE2S_BLOCK_SIZE - keylen);
+ ctx->buflen = BLAKE2S_BLOCK_SIZE;
}
}
-static inline void blake2s_init(struct blake2s_state *state,
- const size_t outlen)
+/**
+ * blake2s_init() - Initialize a BLAKE2s context for a new message (unkeyed)
+ * @ctx: the context to initialize
+ * @outlen: length of output hash value in bytes, at most BLAKE2S_HASH_SIZE
+ *
+ * Context: Any context.
+ */
+static inline void blake2s_init(struct blake2s_ctx *ctx, size_t outlen)
{
- __blake2s_init(state, outlen, NULL, 0);
+ __blake2s_init(ctx, outlen, NULL, 0);
}
-static inline void blake2s_init_key(struct blake2s_state *state,
- const size_t outlen, const void *key,
- const size_t keylen)
+/**
+ * blake2s_init_key() - Initialize a BLAKE2s context for a new message (keyed)
+ * @ctx: the context to initialize
+ * @outlen: length of output hash value in bytes, at most BLAKE2S_HASH_SIZE
+ * @key: the key
+ * @keylen: the key length in bytes, at most BLAKE2S_KEY_SIZE
+ *
+ * Context: Any context.
+ */
+static inline void blake2s_init_key(struct blake2s_ctx *ctx, size_t outlen,
+ const void *key, size_t keylen)
{
WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
!key || !keylen || keylen > BLAKE2S_KEY_SIZE));
- __blake2s_init(state, outlen, key, keylen);
+ __blake2s_init(ctx, outlen, key, keylen);
}
-void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
-void blake2s_final(struct blake2s_state *state, u8 *out);
+/**
+ * blake2s_update() - Update a BLAKE2s context with message data
+ * @ctx: the context to update; must have been initialized
+ * @in: the message data
+ * @inlen: the data length in bytes
+ *
+ * This can be called any number of times.
+ *
+ * Context: Any context.
+ */
+void blake2s_update(struct blake2s_ctx *ctx, const u8 *in, size_t inlen);
-static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
- const size_t outlen, const size_t inlen,
- const size_t keylen)
+/**
+ * blake2s_final() - Finish computing a BLAKE2s hash
+ * @ctx: the context to finalize; must have been initialized
+ * @out: (output) the resulting BLAKE2s hash. Its length will be equal to the
+ * @outlen that was passed to blake2s_init() or blake2s_init_key().
+ *
+ * After finishing, this zeroizes @ctx. So the caller does not need to do it.
+ *
+ * Context: Any context.
+ */
+void blake2s_final(struct blake2s_ctx *ctx, u8 *out);
+
+/**
+ * blake2s() - Compute BLAKE2s hash in one shot
+ * @key: the key, or NULL for an unkeyed hash
+ * @keylen: the key length in bytes (at most BLAKE2S_KEY_SIZE), or 0 for an
+ * unkeyed hash
+ * @in: the message data
+ * @inlen: the data length in bytes
+ * @out: (output) the resulting BLAKE2s hash, with length @outlen
+ * @outlen: length of output hash value in bytes, at most BLAKE2S_HASH_SIZE
+ *
+ * Context: Any context.
+ */
+static inline void blake2s(const u8 *key, size_t keylen,
+ const u8 *in, size_t inlen,
+ u8 *out, size_t outlen)
{
- struct blake2s_state state;
+ struct blake2s_ctx ctx;
WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
(!key && keylen)));
- __blake2s_init(&state, outlen, key, keylen);
- blake2s_update(&state, in, inlen);
- blake2s_final(&state, out);
+ __blake2s_init(&ctx, outlen, key, keylen);
+ blake2s_update(&ctx, in, inlen);
+ blake2s_final(&ctx, out);
}
#endif /* _CRYPTO_BLAKE2S_H */
diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
index 38e26dff27b0..1cc301a48469 100644
--- a/include/crypto/chacha.h
+++ b/include/crypto/chacha.h
@@ -38,18 +38,18 @@ struct chacha_state {
};
void chacha_block_generic(struct chacha_state *state,
- u8 out[CHACHA_BLOCK_SIZE], int nrounds);
+ u8 out[at_least CHACHA_BLOCK_SIZE], int nrounds);
static inline void chacha20_block(struct chacha_state *state,
- u8 out[CHACHA_BLOCK_SIZE])
+ u8 out[at_least CHACHA_BLOCK_SIZE])
{
chacha_block_generic(state, out, 20);
}
void hchacha_block_generic(const struct chacha_state *state,
- u32 out[HCHACHA_OUT_WORDS], int nrounds);
+ u32 out[at_least HCHACHA_OUT_WORDS], int nrounds);
void hchacha_block(const struct chacha_state *state,
- u32 out[HCHACHA_OUT_WORDS], int nrounds);
+ u32 out[at_least HCHACHA_OUT_WORDS], int nrounds);
enum chacha_constants { /* expand 32-byte k */
CHACHA_CONSTANT_EXPA = 0x61707865U,
@@ -67,8 +67,8 @@ static inline void chacha_init_consts(struct chacha_state *state)
}
static inline void chacha_init(struct chacha_state *state,
- const u32 key[CHACHA_KEY_WORDS],
- const u8 iv[CHACHA_IV_SIZE])
+ const u32 key[at_least CHACHA_KEY_WORDS],
+ const u8 iv[at_least CHACHA_IV_SIZE])
{
chacha_init_consts(state);
state->x[4] = key[0];
diff --git a/include/crypto/chacha20poly1305.h b/include/crypto/chacha20poly1305.h
index d2ac3ff7dc1e..0f71b037702d 100644
--- a/include/crypto/chacha20poly1305.h
+++ b/include/crypto/chacha20poly1305.h
@@ -18,32 +18,33 @@ enum chacha20poly1305_lengths {
void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce,
- const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+ const u8 key[at_least CHACHA20POLY1305_KEY_SIZE]);
bool __must_check
chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len, const u64 nonce,
- const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+ const u8 key[at_least CHACHA20POLY1305_KEY_SIZE]);
void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
const u8 *ad, const size_t ad_len,
- const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
- const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+ const u8 nonce[at_least XCHACHA20POLY1305_NONCE_SIZE],
+ const u8 key[at_least CHACHA20POLY1305_KEY_SIZE]);
bool __must_check xchacha20poly1305_decrypt(
- u8 *dst, const u8 *src, const size_t src_len, const u8 *ad,
- const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
- const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+ u8 *dst, const u8 *src, const size_t src_len,
+ const u8 *ad, const size_t ad_len,
+ const u8 nonce[at_least XCHACHA20POLY1305_NONCE_SIZE],
+ const u8 key[at_least CHACHA20POLY1305_KEY_SIZE]);
bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce,
- const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+ const u8 key[at_least CHACHA20POLY1305_KEY_SIZE]);
bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len,
const u8 *ad, const size_t ad_len,
const u64 nonce,
- const u8 key[CHACHA20POLY1305_KEY_SIZE]);
+ const u8 key[at_least CHACHA20POLY1305_KEY_SIZE]);
bool chacha20poly1305_selftest(void);
diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h
index db63a5577c00..2362b48f8741 100644
--- a/include/crypto/curve25519.h
+++ b/include/crypto/curve25519.h
@@ -13,24 +13,28 @@ enum curve25519_lengths {
CURVE25519_KEY_SIZE = 32
};
-void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
- const u8 scalar[CURVE25519_KEY_SIZE],
- const u8 point[CURVE25519_KEY_SIZE]);
+void curve25519_generic(u8 out[at_least CURVE25519_KEY_SIZE],
+ const u8 scalar[at_least CURVE25519_KEY_SIZE],
+ const u8 point[at_least CURVE25519_KEY_SIZE]);
-bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
- const u8 secret[CURVE25519_KEY_SIZE],
- const u8 basepoint[CURVE25519_KEY_SIZE]);
+bool __must_check
+curve25519(u8 mypublic[at_least CURVE25519_KEY_SIZE],
+ const u8 secret[at_least CURVE25519_KEY_SIZE],
+ const u8 basepoint[at_least CURVE25519_KEY_SIZE]);
-bool __must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
- const u8 secret[CURVE25519_KEY_SIZE]);
+bool __must_check
+curve25519_generate_public(u8 pub[at_least CURVE25519_KEY_SIZE],
+ const u8 secret[at_least CURVE25519_KEY_SIZE]);
-static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE])
+static inline void
+curve25519_clamp_secret(u8 secret[at_least CURVE25519_KEY_SIZE])
{
secret[0] &= 248;
secret[31] = (secret[31] & 127) | 64;
}
-static inline void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE])
+static inline void
+curve25519_generate_secret(u8 secret[at_least CURVE25519_KEY_SIZE])
{
get_random_bytes_wait(secret, CURVE25519_KEY_SIZE);
curve25519_clamp_secret(secret);
diff --git a/include/crypto/internal/blake2b.h b/include/crypto/internal/blake2b.h
deleted file mode 100644
index 3e09e2485306..000000000000
--- a/include/crypto/internal/blake2b.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-/*
- * Helper functions for BLAKE2b implementations.
- * Keep this in sync with the corresponding BLAKE2s header.
- */
-
-#ifndef _CRYPTO_INTERNAL_BLAKE2B_H
-#define _CRYPTO_INTERNAL_BLAKE2B_H
-
-#include <asm/byteorder.h>
-#include <crypto/blake2b.h>
-#include <crypto/internal/hash.h>
-#include <linux/array_size.h>
-#include <linux/compiler.h>
-#include <linux/build_bug.h>
-#include <linux/errno.h>
-#include <linux/math.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-static inline void blake2b_set_lastblock(struct blake2b_state *state)
-{
- state->f[0] = -1;
- state->f[1] = 0;
-}
-
-static inline void blake2b_set_nonlast(struct blake2b_state *state)
-{
- state->f[0] = 0;
- state->f[1] = 0;
-}
-
-typedef void (*blake2b_compress_t)(struct blake2b_state *state,
- const u8 *block, size_t nblocks, u32 inc);
-
-/* Helper functions for shash implementations of BLAKE2b */
-
-struct blake2b_tfm_ctx {
- u8 key[BLAKE2B_BLOCK_SIZE];
- unsigned int keylen;
-};
-
-static inline int crypto_blake2b_setkey(struct crypto_shash *tfm,
- const u8 *key, unsigned int keylen)
-{
- struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(tfm);
-
- if (keylen > BLAKE2B_KEY_SIZE)
- return -EINVAL;
-
- BUILD_BUG_ON(BLAKE2B_KEY_SIZE > BLAKE2B_BLOCK_SIZE);
-
- memcpy(tctx->key, key, keylen);
- memset(tctx->key + keylen, 0, BLAKE2B_BLOCK_SIZE - keylen);
- tctx->keylen = keylen;
-
- return 0;
-}
-
-static inline int crypto_blake2b_init(struct shash_desc *desc)
-{
- const struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
- struct blake2b_state *state = shash_desc_ctx(desc);
- unsigned int outlen = crypto_shash_digestsize(desc->tfm);
-
- __blake2b_init(state, outlen, tctx->keylen);
- return tctx->keylen ?
- crypto_shash_update(desc, tctx->key, BLAKE2B_BLOCK_SIZE) : 0;
-}
-
-static inline int crypto_blake2b_update_bo(struct shash_desc *desc,
- const u8 *in, unsigned int inlen,
- blake2b_compress_t compress)
-{
- struct blake2b_state *state = shash_desc_ctx(desc);
-
- blake2b_set_nonlast(state);
- compress(state, in, inlen / BLAKE2B_BLOCK_SIZE, BLAKE2B_BLOCK_SIZE);
- return inlen - round_down(inlen, BLAKE2B_BLOCK_SIZE);
-}
-
-static inline int crypto_blake2b_finup(struct shash_desc *desc, const u8 *in,
- unsigned int inlen, u8 *out,
- blake2b_compress_t compress)
-{
- struct blake2b_state *state = shash_desc_ctx(desc);
- u8 buf[BLAKE2B_BLOCK_SIZE];
- int i;
-
- memcpy(buf, in, inlen);
- memset(buf + inlen, 0, BLAKE2B_BLOCK_SIZE - inlen);
- blake2b_set_lastblock(state);
- compress(state, buf, 1, inlen);
- for (i = 0; i < ARRAY_SIZE(state->h); i++)
- __cpu_to_le64s(&state->h[i]);
- memcpy(out, state->h, crypto_shash_digestsize(desc->tfm));
- memzero_explicit(buf, sizeof(buf));
- return 0;
-}
-
-#endif /* _CRYPTO_INTERNAL_BLAKE2B_H */
diff --git a/include/crypto/md5.h b/include/crypto/md5.h
index c9aa5c3abc53..c47aedfe67ec 100644
--- a/include/crypto/md5.h
+++ b/include/crypto/md5.h
@@ -76,7 +76,7 @@ void md5_update(struct md5_ctx *ctx, const u8 *data, size_t len);
*
* Context: Any context.
*/
-void md5_final(struct md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]);
+void md5_final(struct md5_ctx *ctx, u8 out[at_least MD5_DIGEST_SIZE]);
/**
* md5() - Compute MD5 message digest in one shot
@@ -86,7 +86,7 @@ void md5_final(struct md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]);
*
* Context: Any context.
*/
-void md5(const u8 *data, size_t len, u8 out[MD5_DIGEST_SIZE]);
+void md5(const u8 *data, size_t len, u8 out[at_least MD5_DIGEST_SIZE]);
/**
* struct hmac_md5_key - Prepared key for HMAC-MD5
@@ -173,7 +173,7 @@ static inline void hmac_md5_update(struct hmac_md5_ctx *ctx,
*
* Context: Any context.
*/
-void hmac_md5_final(struct hmac_md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]);
+void hmac_md5_final(struct hmac_md5_ctx *ctx, u8 out[at_least MD5_DIGEST_SIZE]);
/**
* hmac_md5() - Compute HMAC-MD5 in one shot, using a prepared key
@@ -187,7 +187,8 @@ void hmac_md5_final(struct hmac_md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]);
* Context: Any context.
*/
void hmac_md5(const struct hmac_md5_key *key,
- const u8 *data, size_t data_len, u8 out[MD5_DIGEST_SIZE]);
+ const u8 *data, size_t data_len,
+ u8 out[at_least MD5_DIGEST_SIZE]);
/**
* hmac_md5_usingrawkey() - Compute HMAC-MD5 in one shot, using a raw key
@@ -204,6 +205,6 @@ void hmac_md5(const struct hmac_md5_key *key,
*/
void hmac_md5_usingrawkey(const u8 *raw_key, size_t raw_key_len,
const u8 *data, size_t data_len,
- u8 out[MD5_DIGEST_SIZE]);
+ u8 out[at_least MD5_DIGEST_SIZE]);
#endif /* _CRYPTO_MD5_H */
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index d4daeec8da19..190beb427c6d 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -59,7 +59,7 @@ struct poly1305_desc_ctx {
};
void poly1305_init(struct poly1305_desc_ctx *desc,
- const u8 key[POLY1305_KEY_SIZE]);
+ const u8 key[at_least POLY1305_KEY_SIZE]);
void poly1305_update(struct poly1305_desc_ctx *desc,
const u8 *src, unsigned int nbytes);
void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest);
diff --git a/include/crypto/polyval.h b/include/crypto/polyval.h
index d2e63743e592..b28b8ef11353 100644
--- a/include/crypto/polyval.h
+++ b/include/crypto/polyval.h
@@ -1,14 +1,190 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * Common values for the Polyval hash algorithm
+ * POLYVAL library API
*
- * Copyright 2021 Google LLC
+ * Copyright 2025 Google LLC
*/
#ifndef _CRYPTO_POLYVAL_H
#define _CRYPTO_POLYVAL_H
+#include <linux/string.h>
+#include <linux/types.h>
+
#define POLYVAL_BLOCK_SIZE 16
#define POLYVAL_DIGEST_SIZE 16
+/**
+ * struct polyval_elem - An element of the POLYVAL finite field
+ * @bytes: View of the element as a byte array (unioned with @lo and @hi)
+ * @lo: The low 64 terms of the element's polynomial
+ * @hi: The high 64 terms of the element's polynomial
+ *
+ * This represents an element of the finite field GF(2^128), using the POLYVAL
+ * convention: little-endian byte order and natural bit order.
+ */
+struct polyval_elem {
+ union {
+ u8 bytes[POLYVAL_BLOCK_SIZE];
+ struct {
+ __le64 lo;
+ __le64 hi;
+ };
+ };
+};
+
+/**
+ * struct polyval_key - Prepared key for POLYVAL
+ *
+ * This may contain just the raw key H, or it may contain precomputed key
+ * powers, depending on the platform's POLYVAL implementation. Use
+ * polyval_preparekey() to initialize this.
+ *
+ * By H^i we mean H^(i-1) * H * x^-128, with base case H^1 = H. I.e. the
+ * exponentiation repeats the POLYVAL dot operation, with its "extra" x^-128.
+ */
+struct polyval_key {
+#ifdef CONFIG_CRYPTO_LIB_POLYVAL_ARCH
+#ifdef CONFIG_ARM64
+ /** @h_powers: Powers of the hash key H^8 through H^1 */
+ struct polyval_elem h_powers[8];
+#elif defined(CONFIG_X86)
+ /** @h_powers: Powers of the hash key H^8 through H^1 */
+ struct polyval_elem h_powers[8];
+#else
+#error "Unhandled arch"
#endif
+#else /* CONFIG_CRYPTO_LIB_POLYVAL_ARCH */
+ /** @h: The hash key H */
+ struct polyval_elem h;
+#endif /* !CONFIG_CRYPTO_LIB_POLYVAL_ARCH */
+};
+
+/**
+ * struct polyval_ctx - Context for computing a POLYVAL value
+ * @key: Pointer to the prepared POLYVAL key. The user of the API is
+ * responsible for ensuring that the key lives as long as the context.
+ * @acc: The accumulator
+ * @partial: Number of data bytes processed so far modulo POLYVAL_BLOCK_SIZE
+ */
+struct polyval_ctx {
+ const struct polyval_key *key;
+ struct polyval_elem acc;
+ size_t partial;
+};
+
+/**
+ * polyval_preparekey() - Prepare a POLYVAL key
+ * @key: (output) The key structure to initialize
+ * @raw_key: The raw hash key
+ *
+ * Initialize a POLYVAL key structure from a raw key. This may be a simple
+ * copy, or it may involve precomputing powers of the key, depending on the
+ * platform's POLYVAL implementation.
+ *
+ * Context: Any context.
+ */
+#ifdef CONFIG_CRYPTO_LIB_POLYVAL_ARCH
+void polyval_preparekey(struct polyval_key *key,
+ const u8 raw_key[POLYVAL_BLOCK_SIZE]);
+
+#else
+static inline void polyval_preparekey(struct polyval_key *key,
+ const u8 raw_key[POLYVAL_BLOCK_SIZE])
+{
+ /* Just a simple copy, so inline it. */
+ memcpy(key->h.bytes, raw_key, POLYVAL_BLOCK_SIZE);
+}
+#endif
+
+/**
+ * polyval_init() - Initialize a POLYVAL context for a new message
+ * @ctx: The context to initialize
+ * @key: The key to use. Note that a pointer to the key is saved in the
+ * context, so the key must live at least as long as the context.
+ */
+static inline void polyval_init(struct polyval_ctx *ctx,
+ const struct polyval_key *key)
+{
+ *ctx = (struct polyval_ctx){ .key = key };
+}
+
+/**
+ * polyval_import_blkaligned() - Import a POLYVAL accumulator value
+ * @ctx: The context to initialize
+ * @key: The key to import. Note that a pointer to the key is saved in the
+ * context, so the key must live at least as long as the context.
+ * @acc: The accumulator value to import.
+ *
+ * This imports an accumulator that was saved by polyval_export_blkaligned().
+ * The same key must be used.
+ */
+static inline void
+polyval_import_blkaligned(struct polyval_ctx *ctx,
+ const struct polyval_key *key,
+ const struct polyval_elem *acc)
+{
+ *ctx = (struct polyval_ctx){ .key = key, .acc = *acc };
+}
+
+/**
+ * polyval_export_blkaligned() - Export a POLYVAL accumulator value
+ * @ctx: The context to export the accumulator value from
+ * @acc: (output) The exported accumulator value
+ *
+ * This exports the accumulator from a POLYVAL context. The number of data
+ * bytes processed so far must be a multiple of POLYVAL_BLOCK_SIZE.
+ */
+static inline void polyval_export_blkaligned(const struct polyval_ctx *ctx,
+ struct polyval_elem *acc)
+{
+ *acc = ctx->acc;
+}
+
+/**
+ * polyval_update() - Update a POLYVAL context with message data
+ * @ctx: The context to update; must have been initialized
+ * @data: The message data
+ * @len: The data length in bytes. Doesn't need to be block-aligned.
+ *
+ * This can be called any number of times.
+ *
+ * Context: Any context.
+ */
+void polyval_update(struct polyval_ctx *ctx, const u8 *data, size_t len);
+
+/**
+ * polyval_final() - Finish computing a POLYVAL value
+ * @ctx: The context to finalize
+ * @out: The output value
+ *
+ * If the total data length isn't a multiple of POLYVAL_BLOCK_SIZE, then the
+ * final block is automatically zero-padded.
+ *
+ * After finishing, this zeroizes @ctx. So the caller does not need to do it.
+ *
+ * Context: Any context.
+ */
+void polyval_final(struct polyval_ctx *ctx, u8 out[POLYVAL_BLOCK_SIZE]);
+
+/**
+ * polyval() - Compute a POLYVAL value
+ * @key: The prepared key
+ * @data: The message data
+ * @len: The data length in bytes. Doesn't need to be block-aligned.
+ * @out: The output value
+ *
+ * Context: Any context.
+ */
+static inline void polyval(const struct polyval_key *key,
+ const u8 *data, size_t len,
+ u8 out[POLYVAL_BLOCK_SIZE])
+{
+ struct polyval_ctx ctx;
+
+ polyval_init(&ctx, key);
+ polyval_update(&ctx, data, len);
+ polyval_final(&ctx, out);
+}
+
+#endif /* _CRYPTO_POLYVAL_H */
diff --git a/include/crypto/sha1.h b/include/crypto/sha1.h
index 162a529ec841..27f08b972931 100644
--- a/include/crypto/sha1.h
+++ b/include/crypto/sha1.h
@@ -84,7 +84,7 @@ void sha1_update(struct sha1_ctx *ctx, const u8 *data, size_t len);
*
* Context: Any context.
*/
-void sha1_final(struct sha1_ctx *ctx, u8 out[SHA1_DIGEST_SIZE]);
+void sha1_final(struct sha1_ctx *ctx, u8 out[at_least SHA1_DIGEST_SIZE]);
/**
* sha1() - Compute SHA-1 message digest in one shot
@@ -94,7 +94,7 @@ void sha1_final(struct sha1_ctx *ctx, u8 out[SHA1_DIGEST_SIZE]);
*
* Context: Any context.
*/
-void sha1(const u8 *data, size_t len, u8 out[SHA1_DIGEST_SIZE]);
+void sha1(const u8 *data, size_t len, u8 out[at_least SHA1_DIGEST_SIZE]);
/**
* struct hmac_sha1_key - Prepared key for HMAC-SHA1
@@ -181,7 +181,8 @@ static inline void hmac_sha1_update(struct hmac_sha1_ctx *ctx,
*
* Context: Any context.
*/
-void hmac_sha1_final(struct hmac_sha1_ctx *ctx, u8 out[SHA1_DIGEST_SIZE]);
+void hmac_sha1_final(struct hmac_sha1_ctx *ctx,
+ u8 out[at_least SHA1_DIGEST_SIZE]);
/**
* hmac_sha1() - Compute HMAC-SHA1 in one shot, using a prepared key
@@ -195,7 +196,8 @@ void hmac_sha1_final(struct hmac_sha1_ctx *ctx, u8 out[SHA1_DIGEST_SIZE]);
* Context: Any context.
*/
void hmac_sha1(const struct hmac_sha1_key *key,
- const u8 *data, size_t data_len, u8 out[SHA1_DIGEST_SIZE]);
+ const u8 *data, size_t data_len,
+ u8 out[at_least SHA1_DIGEST_SIZE]);
/**
* hmac_sha1_usingrawkey() - Compute HMAC-SHA1 in one shot, using a raw key
@@ -212,6 +214,6 @@ void hmac_sha1(const struct hmac_sha1_key *key,
*/
void hmac_sha1_usingrawkey(const u8 *raw_key, size_t raw_key_len,
const u8 *data, size_t data_len,
- u8 out[SHA1_DIGEST_SIZE]);
+ u8 out[at_least SHA1_DIGEST_SIZE]);
#endif /* _CRYPTO_SHA1_H */
diff --git a/include/crypto/sha2.h b/include/crypto/sha2.h
index e5dafb935cc8..7bb8fe169daf 100644
--- a/include/crypto/sha2.h
+++ b/include/crypto/sha2.h
@@ -190,7 +190,7 @@ static inline void sha224_update(struct sha224_ctx *ctx,
*
* Context: Any context.
*/
-void sha224_final(struct sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]);
+void sha224_final(struct sha224_ctx *ctx, u8 out[at_least SHA224_DIGEST_SIZE]);
/**
* sha224() - Compute SHA-224 message digest in one shot
@@ -200,7 +200,7 @@ void sha224_final(struct sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]);
*
* Context: Any context.
*/
-void sha224(const u8 *data, size_t len, u8 out[SHA224_DIGEST_SIZE]);
+void sha224(const u8 *data, size_t len, u8 out[at_least SHA224_DIGEST_SIZE]);
/**
* struct hmac_sha224_key - Prepared key for HMAC-SHA224
@@ -287,7 +287,8 @@ static inline void hmac_sha224_update(struct hmac_sha224_ctx *ctx,
*
* Context: Any context.
*/
-void hmac_sha224_final(struct hmac_sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]);
+void hmac_sha224_final(struct hmac_sha224_ctx *ctx,
+ u8 out[at_least SHA224_DIGEST_SIZE]);
/**
* hmac_sha224() - Compute HMAC-SHA224 in one shot, using a prepared key
@@ -301,7 +302,8 @@ void hmac_sha224_final(struct hmac_sha224_ctx *ctx, u8 out[SHA224_DIGEST_SIZE]);
* Context: Any context.
*/
void hmac_sha224(const struct hmac_sha224_key *key,
- const u8 *data, size_t data_len, u8 out[SHA224_DIGEST_SIZE]);
+ const u8 *data, size_t data_len,
+ u8 out[at_least SHA224_DIGEST_SIZE]);
/**
* hmac_sha224_usingrawkey() - Compute HMAC-SHA224 in one shot, using a raw key
@@ -318,7 +320,7 @@ void hmac_sha224(const struct hmac_sha224_key *key,
*/
void hmac_sha224_usingrawkey(const u8 *raw_key, size_t raw_key_len,
const u8 *data, size_t data_len,
- u8 out[SHA224_DIGEST_SIZE]);
+ u8 out[at_least SHA224_DIGEST_SIZE]);
/**
* struct sha256_ctx - Context for hashing a message with SHA-256
@@ -363,7 +365,7 @@ static inline void sha256_update(struct sha256_ctx *ctx,
*
* Context: Any context.
*/
-void sha256_final(struct sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]);
+void sha256_final(struct sha256_ctx *ctx, u8 out[at_least SHA256_DIGEST_SIZE]);
/**
* sha256() - Compute SHA-256 message digest in one shot
@@ -373,7 +375,7 @@ void sha256_final(struct sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]);
*
* Context: Any context.
*/
-void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]);
+void sha256(const u8 *data, size_t len, u8 out[at_least SHA256_DIGEST_SIZE]);
/**
* sha256_finup_2x() - Compute two SHA-256 digests from a common initial
@@ -390,8 +392,9 @@ void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]);
* Context: Any context.
*/
void sha256_finup_2x(const struct sha256_ctx *ctx, const u8 *data1,
- const u8 *data2, size_t len, u8 out1[SHA256_DIGEST_SIZE],
- u8 out2[SHA256_DIGEST_SIZE]);
+ const u8 *data2, size_t len,
+ u8 out1[at_least SHA256_DIGEST_SIZE],
+ u8 out2[at_least SHA256_DIGEST_SIZE]);
/**
* sha256_finup_2x_is_optimized() - Check if sha256_finup_2x() is using a real
@@ -488,7 +491,8 @@ static inline void hmac_sha256_update(struct hmac_sha256_ctx *ctx,
*
* Context: Any context.
*/
-void hmac_sha256_final(struct hmac_sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]);
+void hmac_sha256_final(struct hmac_sha256_ctx *ctx,
+ u8 out[at_least SHA256_DIGEST_SIZE]);
/**
* hmac_sha256() - Compute HMAC-SHA256 in one shot, using a prepared key
@@ -502,7 +506,8 @@ void hmac_sha256_final(struct hmac_sha256_ctx *ctx, u8 out[SHA256_DIGEST_SIZE]);
* Context: Any context.
*/
void hmac_sha256(const struct hmac_sha256_key *key,
- const u8 *data, size_t data_len, u8 out[SHA256_DIGEST_SIZE]);
+ const u8 *data, size_t data_len,
+ u8 out[at_least SHA256_DIGEST_SIZE]);
/**
* hmac_sha256_usingrawkey() - Compute HMAC-SHA256 in one shot, using a raw key
@@ -519,7 +524,7 @@ void hmac_sha256(const struct hmac_sha256_key *key,
*/
void hmac_sha256_usingrawkey(const u8 *raw_key, size_t raw_key_len,
const u8 *data, size_t data_len,
- u8 out[SHA256_DIGEST_SIZE]);
+ u8 out[at_least SHA256_DIGEST_SIZE]);
/* State for the SHA-512 (and SHA-384) compression function */
struct sha512_block_state {
@@ -598,7 +603,7 @@ static inline void sha384_update(struct sha384_ctx *ctx,
*
* Context: Any context.
*/
-void sha384_final(struct sha384_ctx *ctx, u8 out[SHA384_DIGEST_SIZE]);
+void sha384_final(struct sha384_ctx *ctx, u8 out[at_least SHA384_DIGEST_SIZE]);
/**
* sha384() - Compute SHA-384 message digest in one shot
@@ -608,7 +613,7 @@ void sha384_final(struct sha384_ctx *ctx, u8 out[SHA384_DIGEST_SIZE]);
*
* Context: Any context.
*/
-void sha384(const u8 *data, size_t len, u8 out[SHA384_DIGEST_SIZE]);
+void sha384(const u8 *data, size_t len, u8 out[at_least SHA384_DIGEST_SIZE]);
/**
* struct hmac_sha384_key - Prepared key for HMAC-SHA384
@@ -695,7 +700,8 @@ static inline void hmac_sha384_update(struct hmac_sha384_ctx *ctx,
*
* Context: Any context.
*/
-void hmac_sha384_final(struct hmac_sha384_ctx *ctx, u8 out[SHA384_DIGEST_SIZE]);
+void hmac_sha384_final(struct hmac_sha384_ctx *ctx,
+ u8 out[at_least SHA384_DIGEST_SIZE]);
/**
* hmac_sha384() - Compute HMAC-SHA384 in one shot, using a prepared key
@@ -709,7 +715,8 @@ void hmac_sha384_final(struct hmac_sha384_ctx *ctx, u8 out[SHA384_DIGEST_SIZE]);
* Context: Any context.
*/
void hmac_sha384(const struct hmac_sha384_key *key,
- const u8 *data, size_t data_len, u8 out[SHA384_DIGEST_SIZE]);
+ const u8 *data, size_t data_len,
+ u8 out[at_least SHA384_DIGEST_SIZE]);
/**
* hmac_sha384_usingrawkey() - Compute HMAC-SHA384 in one shot, using a raw key
@@ -726,7 +733,7 @@ void hmac_sha384(const struct hmac_sha384_key *key,
*/
void hmac_sha384_usingrawkey(const u8 *raw_key, size_t raw_key_len,
const u8 *data, size_t data_len,
- u8 out[SHA384_DIGEST_SIZE]);
+ u8 out[at_least SHA384_DIGEST_SIZE]);
/**
* struct sha512_ctx - Context for hashing a message with SHA-512
@@ -771,7 +778,7 @@ static inline void sha512_update(struct sha512_ctx *ctx,
*
* Context: Any context.
*/
-void sha512_final(struct sha512_ctx *ctx, u8 out[SHA512_DIGEST_SIZE]);
+void sha512_final(struct sha512_ctx *ctx, u8 out[at_least SHA512_DIGEST_SIZE]);
/**
* sha512() - Compute SHA-512 message digest in one shot
@@ -781,7 +788,7 @@ void sha512_final(struct sha512_ctx *ctx, u8 out[SHA512_DIGEST_SIZE]);
*
* Context: Any context.
*/
-void sha512(const u8 *data, size_t len, u8 out[SHA512_DIGEST_SIZE]);
+void sha512(const u8 *data, size_t len, u8 out[at_least SHA512_DIGEST_SIZE]);
/**
* struct hmac_sha512_key - Prepared key for HMAC-SHA512
@@ -868,7 +875,8 @@ static inline void hmac_sha512_update(struct hmac_sha512_ctx *ctx,
*
* Context: Any context.
*/
-void hmac_sha512_final(struct hmac_sha512_ctx *ctx, u8 out[SHA512_DIGEST_SIZE]);
+void hmac_sha512_final(struct hmac_sha512_ctx *ctx,
+ u8 out[at_least SHA512_DIGEST_SIZE]);
/**
* hmac_sha512() - Compute HMAC-SHA512 in one shot, using a prepared key
@@ -882,7 +890,8 @@ void hmac_sha512_final(struct hmac_sha512_ctx *ctx, u8 out[SHA512_DIGEST_SIZE]);
* Context: Any context.
*/
void hmac_sha512(const struct hmac_sha512_key *key,
- const u8 *data, size_t data_len, u8 out[SHA512_DIGEST_SIZE]);
+ const u8 *data, size_t data_len,
+ u8 out[at_least SHA512_DIGEST_SIZE]);
/**
* hmac_sha512_usingrawkey() - Compute HMAC-SHA512 in one shot, using a raw key
@@ -899,6 +908,6 @@ void hmac_sha512(const struct hmac_sha512_key *key,
*/
void hmac_sha512_usingrawkey(const u8 *raw_key, size_t raw_key_len,
const u8 *data, size_t data_len,
- u8 out[SHA512_DIGEST_SIZE]);
+ u8 out[at_least SHA512_DIGEST_SIZE]);
#endif /* _CRYPTO_SHA2_H */
diff --git a/include/crypto/sha3.h b/include/crypto/sha3.h
index 41e1b83a6d91..c9e4182ff74f 100644
--- a/include/crypto/sha3.h
+++ b/include/crypto/sha3.h
@@ -1,11 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Common values for SHA-3 algorithms
+ *
+ * See also Documentation/crypto/sha3.rst
*/
#ifndef __CRYPTO_SHA3_H__
#define __CRYPTO_SHA3_H__
#include <linux/types.h>
+#include <linux/string.h>
#define SHA3_224_DIGEST_SIZE (224 / 8)
#define SHA3_224_BLOCK_SIZE (200 - 2 * SHA3_224_DIGEST_SIZE)
@@ -23,14 +26,321 @@
#define SHA3_512_BLOCK_SIZE (200 - 2 * SHA3_512_DIGEST_SIZE)
#define SHA3_512_EXPORT_SIZE SHA3_STATE_SIZE + SHA3_512_BLOCK_SIZE + 1
-#define SHA3_STATE_SIZE 200
+/*
+ * SHAKE128 and SHAKE256 actually have variable output size, but this is used to
+ * calculate the block size (rate) analogously to the above.
+ */
+#define SHAKE128_DEFAULT_SIZE (128 / 8)
+#define SHAKE128_BLOCK_SIZE (200 - 2 * SHAKE128_DEFAULT_SIZE)
+#define SHAKE256_DEFAULT_SIZE (256 / 8)
+#define SHAKE256_BLOCK_SIZE (200 - 2 * SHAKE256_DEFAULT_SIZE)
-struct shash_desc;
+#define SHA3_STATE_SIZE 200
+/*
+ * State for the Keccak-f[1600] permutation: 25 64-bit words.
+ *
+ * We usually keep the state words as little-endian, to make absorbing and
+ * squeezing easier. (It means that absorbing and squeezing can just treat the
+ * state as a byte array.) The state words are converted to native-endian only
+ * temporarily by implementations of the permutation that need native-endian
+ * words. Of course, that conversion is a no-op on little-endian machines.
+ */
struct sha3_state {
- u64 st[SHA3_STATE_SIZE / 8];
+ union {
+ __le64 words[SHA3_STATE_SIZE / 8];
+ u8 bytes[SHA3_STATE_SIZE];
+
+ u64 native_words[SHA3_STATE_SIZE / 8]; /* see comment above */
+ };
+};
+
+/* Internal context, shared by the digests (SHA3-*) and the XOFs (SHAKE*) */
+struct __sha3_ctx {
+ struct sha3_state state;
+ u8 digest_size; /* Digests only: the digest size in bytes */
+ u8 block_size; /* Block size in bytes */
+ u8 absorb_offset; /* Index of next state byte to absorb into */
+ u8 squeeze_offset; /* XOFs only: index of next state byte to extract */
+};
+
+void __sha3_update(struct __sha3_ctx *ctx, const u8 *in, size_t in_len);
+
+/**
+ * struct sha3_ctx - Context for SHA3-224, SHA3-256, SHA3-384, or SHA3-512
+ * @ctx: private
+ */
+struct sha3_ctx {
+ struct __sha3_ctx ctx;
};
-int crypto_sha3_init(struct shash_desc *desc);
+/**
+ * sha3_zeroize_ctx() - Zeroize a SHA-3 context
+ * @ctx: The context to zeroize
+ *
+ * This is already called by sha3_final(). Call this explicitly when abandoning
+ * a context without calling sha3_final().
+ */
+static inline void sha3_zeroize_ctx(struct sha3_ctx *ctx)
+{
+ memzero_explicit(ctx, sizeof(*ctx));
+}
+
+/**
+ * struct shake_ctx - Context for SHAKE128 or SHAKE256
+ * @ctx: private
+ */
+struct shake_ctx {
+ struct __sha3_ctx ctx;
+};
+
+/**
+ * shake_zeroize_ctx() - Zeroize a SHAKE context
+ * @ctx: The context to zeroize
+ *
+ * Call this after the last squeeze.
+ */
+static inline void shake_zeroize_ctx(struct shake_ctx *ctx)
+{
+ memzero_explicit(ctx, sizeof(*ctx));
+}
+
+/**
+ * sha3_224_init() - Initialize a context for SHA3-224
+ * @ctx: The context to initialize
+ *
+ * This begins a new SHA3-224 message digest computation.
+ *
+ * Context: Any context.
+ */
+static inline void sha3_224_init(struct sha3_ctx *ctx)
+{
+ *ctx = (struct sha3_ctx){
+ .ctx.digest_size = SHA3_224_DIGEST_SIZE,
+ .ctx.block_size = SHA3_224_BLOCK_SIZE,
+ };
+}
+
+/**
+ * sha3_256_init() - Initialize a context for SHA3-256
+ * @ctx: The context to initialize
+ *
+ * This begins a new SHA3-256 message digest computation.
+ *
+ * Context: Any context.
+ */
+static inline void sha3_256_init(struct sha3_ctx *ctx)
+{
+ *ctx = (struct sha3_ctx){
+ .ctx.digest_size = SHA3_256_DIGEST_SIZE,
+ .ctx.block_size = SHA3_256_BLOCK_SIZE,
+ };
+}
+
+/**
+ * sha3_384_init() - Initialize a context for SHA3-384
+ * @ctx: The context to initialize
+ *
+ * This begins a new SHA3-384 message digest computation.
+ *
+ * Context: Any context.
+ */
+static inline void sha3_384_init(struct sha3_ctx *ctx)
+{
+ *ctx = (struct sha3_ctx){
+ .ctx.digest_size = SHA3_384_DIGEST_SIZE,
+ .ctx.block_size = SHA3_384_BLOCK_SIZE,
+ };
+}
+
+/**
+ * sha3_512_init() - Initialize a context for SHA3-512
+ * @ctx: The context to initialize
+ *
+ * This begins a new SHA3-512 message digest computation.
+ *
+ * Context: Any context.
+ */
+static inline void sha3_512_init(struct sha3_ctx *ctx)
+{
+ *ctx = (struct sha3_ctx){
+ .ctx.digest_size = SHA3_512_DIGEST_SIZE,
+ .ctx.block_size = SHA3_512_BLOCK_SIZE,
+ };
+}
+
+/**
+ * sha3_update() - Update a SHA-3 digest context with input data
+ * @ctx: The context to update; must have been initialized
+ * @in: The input data
+ * @in_len: Length of the input data in bytes
+ *
+ * This can be called any number of times to add data to a SHA3-224, SHA3-256,
+ * SHA3-384, or SHA3-512 digest (depending on which init function was called).
+ *
+ * Context: Any context.
+ */
+static inline void sha3_update(struct sha3_ctx *ctx,
+ const u8 *in, size_t in_len)
+{
+ __sha3_update(&ctx->ctx, in, in_len);
+}
+
+/**
+ * sha3_final() - Finish computing a SHA-3 message digest
+ * @ctx: The context to finalize; must have been initialized
+ * @out: (output) The resulting SHA3-224, SHA3-256, SHA3-384, or SHA3-512
+ * message digest, matching the init function that was called. Note that
+ * the size differs for each one; see SHA3_*_DIGEST_SIZE.
+ *
+ * After finishing, this zeroizes @ctx. So the caller does not need to do it.
+ *
+ * Context: Any context.
+ */
+void sha3_final(struct sha3_ctx *ctx, u8 *out);
+
+/**
+ * shake128_init() - Initialize a context for SHAKE128
+ * @ctx: The context to initialize
+ *
+ * This begins a new SHAKE128 extendable-output function (XOF) computation.
+ *
+ * Context: Any context.
+ */
+static inline void shake128_init(struct shake_ctx *ctx)
+{
+ *ctx = (struct shake_ctx){
+ .ctx.block_size = SHAKE128_BLOCK_SIZE,
+ };
+}
+
+/**
+ * shake256_init() - Initialize a context for SHAKE256
+ * @ctx: The context to initialize
+ *
+ * This begins a new SHAKE256 extendable-output function (XOF) computation.
+ *
+ * Context: Any context.
+ */
+static inline void shake256_init(struct shake_ctx *ctx)
+{
+ *ctx = (struct shake_ctx){
+ .ctx.block_size = SHAKE256_BLOCK_SIZE,
+ };
+}
+
+/**
+ * shake_update() - Update a SHAKE context with input data
+ * @ctx: The context to update; must have been initialized
+ * @in: The input data
+ * @in_len: Length of the input data in bytes
+ *
+ * This can be called any number of times to add more input data to SHAKE128 or
+ * SHAKE256. This cannot be called after squeezing has begun.
+ *
+ * Context: Any context.
+ */
+static inline void shake_update(struct shake_ctx *ctx,
+ const u8 *in, size_t in_len)
+{
+ __sha3_update(&ctx->ctx, in, in_len);
+}
+
+/**
+ * shake_squeeze() - Generate output from SHAKE128 or SHAKE256
+ * @ctx: The context to squeeze; must have been initialized
+ * @out: Where to write the resulting output data
+ * @out_len: The amount of data to extract to @out in bytes
+ *
+ * This may be called multiple times. A number of consecutive squeezes laid
+ * end-to-end will yield the same output as one big squeeze generating the same
+ * total amount of output. More input cannot be provided after squeezing has
+ * begun. After the last squeeze, call shake_zeroize_ctx().
+ *
+ * Context: Any context.
+ */
+void shake_squeeze(struct shake_ctx *ctx, u8 *out, size_t out_len);
+
+/**
+ * sha3_224() - Compute SHA3-224 digest in one shot
+ * @in: The input data to be digested
+ * @in_len: Length of the input data in bytes
+ * @out: The buffer into which the digest will be stored
+ *
+ * Convenience function that computes a SHA3-224 digest. Use this instead of
+ * the incremental API if you're able to provide all the input at once.
+ *
+ * Context: Any context.
+ */
+void sha3_224(const u8 *in, size_t in_len, u8 out[SHA3_224_DIGEST_SIZE]);
+
+/**
+ * sha3_256() - Compute SHA3-256 digest in one shot
+ * @in: The input data to be digested
+ * @in_len: Length of the input data in bytes
+ * @out: The buffer into which the digest will be stored
+ *
+ * Convenience function that computes a SHA3-256 digest. Use this instead of
+ * the incremental API if you're able to provide all the input at once.
+ *
+ * Context: Any context.
+ */
+void sha3_256(const u8 *in, size_t in_len, u8 out[SHA3_256_DIGEST_SIZE]);
+
+/**
+ * sha3_384() - Compute SHA3-384 digest in one shot
+ * @in: The input data to be digested
+ * @in_len: Length of the input data in bytes
+ * @out: The buffer into which the digest will be stored
+ *
+ * Convenience function that computes a SHA3-384 digest. Use this instead of
+ * the incremental API if you're able to provide all the input at once.
+ *
+ * Context: Any context.
+ */
+void sha3_384(const u8 *in, size_t in_len, u8 out[SHA3_384_DIGEST_SIZE]);
+
+/**
+ * sha3_512() - Compute SHA3-512 digest in one shot
+ * @in: The input data to be digested
+ * @in_len: Length of the input data in bytes
+ * @out: The buffer into which the digest will be stored
+ *
+ * Convenience function that computes a SHA3-512 digest. Use this instead of
+ * the incremental API if you're able to provide all the input at once.
+ *
+ * Context: Any context.
+ */
+void sha3_512(const u8 *in, size_t in_len, u8 out[SHA3_512_DIGEST_SIZE]);
+
+/**
+ * shake128() - Compute SHAKE128 in one shot
+ * @in: The input data to be used
+ * @in_len: Length of the input data in bytes
+ * @out: The buffer into which the output will be stored
+ * @out_len: Length of the output to produce in bytes
+ *
+ * Convenience function that computes SHAKE128 in one shot. Use this instead of
+ * the incremental API if you're able to provide all the input at once as well
+ * as receive all the output at once. All output lengths are supported.
+ *
+ * Context: Any context.
+ */
+void shake128(const u8 *in, size_t in_len, u8 *out, size_t out_len);
+
+/**
+ * shake256() - Compute SHAKE256 in one shot
+ * @in: The input data to be used
+ * @in_len: Length of the input data in bytes
+ * @out: The buffer into which the output will be stored
+ * @out_len: Length of the output to produce in bytes
+ *
+ * Convenience function that computes SHAKE256 in one shot. Use this instead of
+ * the incremental API if you're able to provide all the input at once as well
+ * as receive all the output at once. All output lengths are supported.
+ *
+ * Context: Any context.
+ */
+void shake256(const u8 *in, size_t in_len, u8 *out, size_t out_len);
-#endif
+#endif /* __CRYPTO_SHA3_H__ */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 5ff5d99f6ead..fbf0c3a65f59 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -8,6 +8,7 @@
#ifndef _LINUX_ACPI_H
#define _LINUX_ACPI_H
+#include <linux/cleanup.h>
#include <linux/errno.h>
#include <linux/ioport.h> /* for struct resource */
#include <linux/resource_ext.h>
@@ -221,6 +222,17 @@ void acpi_reserve_initial_tables (void);
void acpi_table_init_complete (void);
int acpi_table_init (void);
+static inline struct acpi_table_header *acpi_get_table_pointer(char *signature, u32 instance)
+{
+ struct acpi_table_header *table;
+ int status = acpi_get_table(signature, instance, &table);
+
+ if (ACPI_FAILURE(status))
+ return ERR_PTR(-ENOENT);
+ return table;
+}
+DEFINE_FREE(acpi_put_table, struct acpi_table_header *, if (!IS_ERR_OR_NULL(_T)) acpi_put_table(_T))
+
int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
int __init_or_acpilib acpi_table_parse_entries(char *id,
unsigned long table_size, int entry_id,
@@ -755,7 +767,6 @@ int acpi_reconfig_notifier_unregister(struct notifier_block *nb);
int acpi_gtdt_init(struct acpi_table_header *table, int *platform_timer_count);
int acpi_gtdt_map_ppi(int type);
bool acpi_gtdt_c3stop(int type);
-int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count);
#endif
#ifndef ACPI_HAVE_ARCH_SET_ROOT_POINTER
@@ -1146,12 +1157,7 @@ struct acpi_s2idle_dev_ops {
#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86)
int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg);
void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg);
-int acpi_get_lps0_constraint(struct acpi_device *adev);
#else /* CONFIG_SUSPEND && CONFIG_X86 */
-static inline int acpi_get_lps0_constraint(struct device *dev)
-{
- return ACPI_STATE_UNKNOWN;
-}
static inline int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg)
{
return -ENODEV;
@@ -1349,9 +1355,6 @@ acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid,
int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname,
void **valptr);
-struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
- struct fwnode_handle *child);
-
struct acpi_probe_entry;
typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *,
struct acpi_probe_entry *);
@@ -1451,13 +1454,6 @@ static inline int acpi_node_prop_get(const struct fwnode_handle *fwnode,
}
static inline struct fwnode_handle *
-acpi_get_next_subnode(const struct fwnode_handle *fwnode,
- struct fwnode_handle *child)
-{
- return NULL;
-}
-
-static inline struct fwnode_handle *
acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
struct fwnode_handle *prev)
{
@@ -1509,12 +1505,19 @@ static inline int acpi_parse_spcr(bool enable_earlycon, bool enable_console)
#if IS_ENABLED(CONFIG_ACPI_GENERIC_GSI)
int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res);
+const struct cpumask *acpi_irq_get_affinity(acpi_handle handle,
+ unsigned int index);
#else
static inline
int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res)
{
return -EINVAL;
}
+static inline const struct cpumask *acpi_irq_get_affinity(acpi_handle handle,
+ unsigned int index)
+{
+ return NULL;
+}
#endif
#ifdef CONFIG_ACPI_LPIT
@@ -1541,6 +1544,9 @@ int find_acpi_cpu_topology(unsigned int cpu, int level);
int find_acpi_cpu_topology_cluster(unsigned int cpu);
int find_acpi_cpu_topology_package(unsigned int cpu);
int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
+void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id, cpumask_t *cpus);
+int find_acpi_cache_level_from_id(u32 cache_id);
+int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id, cpumask_t *cpus);
#else
static inline int acpi_pptt_cpu_is_thread(unsigned int cpu)
{
@@ -1562,6 +1568,17 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
{
return -EINVAL;
}
+static inline void acpi_pptt_get_cpus_from_container(u32 acpi_cpu_id,
+ cpumask_t *cpus) { }
+static inline int find_acpi_cache_level_from_id(u32 cache_id)
+{
+ return -ENOENT;
+}
+static inline int acpi_pptt_get_cpumask_from_cache_id(u32 cache_id,
+ cpumask_t *cpus)
+{
+ return -ENOENT;
+}
#endif
void acpi_arch_init(void);
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index d72d6e5aa200..0c2a8b846c20 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -89,6 +89,21 @@ void remove_cpu_topology(unsigned int cpuid);
void reset_cpu_topology(void);
int parse_acpi_topology(void);
void freq_inv_set_max_ratio(int cpu, u64 max_rate);
-#endif
+
+/*
+ * Architectures like ARM64 don't have reliable architectural way to get SMT
+ * information and depend on the firmware (ACPI/OF) report. Non-SMT core won't
+ * initialize thread_id so we can use this to detect the SMT implementation.
+ */
+static inline bool topology_core_has_smt(int cpu)
+{
+ return cpu_topology[cpu].thread_id != -1;
+}
+
+#else
+
+static inline bool topology_core_has_smt(int cpu) { return false; }
+
+#endif /* CONFIG_GENERIC_ARCH_TOPOLOGY */
#endif /* _LINUX_ARCH_TOPOLOGY_H_ */
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
new file mode 100644
index 000000000000..7f00c5285a32
--- /dev/null
+++ b/include/linux/arm_mpam.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2025 Arm Ltd. */
+
+#ifndef __LINUX_ARM_MPAM_H
+#define __LINUX_ARM_MPAM_H
+
+#include <linux/acpi.h>
+#include <linux/types.h>
+
+struct mpam_msc;
+
+enum mpam_msc_iface {
+ MPAM_IFACE_MMIO, /* a real MPAM MSC */
+ MPAM_IFACE_PCC, /* a fake MPAM MSC */
+};
+
+enum mpam_class_types {
+ MPAM_CLASS_CACHE, /* Caches, e.g. L2, L3 */
+ MPAM_CLASS_MEMORY, /* Main memory */
+ MPAM_CLASS_UNKNOWN, /* Everything else, e.g. SMMU */
+};
+
+#define MPAM_CLASS_ID_DEFAULT 255
+
+#ifdef CONFIG_ACPI_MPAM
+int acpi_mpam_parse_resources(struct mpam_msc *msc,
+ struct acpi_mpam_msc_node *tbl_msc);
+
+int acpi_mpam_count_msc(void);
+#else
+static inline int acpi_mpam_parse_resources(struct mpam_msc *msc,
+ struct acpi_mpam_msc_node *tbl_msc)
+{
+ return -EINVAL;
+}
+
+static inline int acpi_mpam_count_msc(void) { return -EINVAL; }
+#endif
+
+#ifdef CONFIG_ARM64_MPAM_DRIVER
+int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
+ enum mpam_class_types type, u8 class_id, int component_id);
+#else
+static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
+ enum mpam_class_types type, u8 class_id,
+ int component_id)
+{
+ return -EINVAL;
+}
+#endif
+
+/**
+ * mpam_register_requestor() - Register a requestor with the MPAM driver
+ * @partid_max: The maximum PARTID value the requestor can generate.
+ * @pmg_max: The maximum PMG value the requestor can generate.
+ *
+ * Registers a requestor with the MPAM driver to ensure the chosen system-wide
+ * minimum PARTID and PMG values will allow the requestors features to be used.
+ *
+ * Returns an error if the registration is too late, and a larger PARTID/PMG
+ * value has been advertised to user-space. In this case the requestor should
+ * not use its MPAM features. Returns 0 on success.
+ */
+int mpam_register_requestor(u16 partid_max, u8 pmg_max);
+
+#endif /* __LINUX_ARM_MPAM_H */
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 595217b7a6e7..b0395e4ccf90 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -45,6 +45,7 @@ struct device;
* bitmap_copy(dst, src, nbits) *dst = *src
* bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2
* bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2
+ * bitmap_weighted_or(dst, src1, src2, nbits) *dst = *src1 | *src2. Returns Hamming Weight of dst
* bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2
* bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2)
* bitmap_complement(dst, src, nbits) *dst = ~(*src)
@@ -165,6 +166,8 @@ bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
+unsigned int __bitmap_weighted_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int nbits);
void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
@@ -338,6 +341,18 @@ void bitmap_or(unsigned long *dst, const unsigned long *src1,
}
static __always_inline
+unsigned int bitmap_weighted_or(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
+{
+ if (small_const_nbits(nbits)) {
+ *dst = *src1 | *src2;
+ return hweight_long(*dst & BITMAP_LAST_WORD_MASK(nbits));
+ } else {
+ return __bitmap_weighted_or(dst, src1, src2, nbits);
+ }
+}
+
+static __always_inline
void bitmap_xor(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
diff --git a/include/linux/bug.h b/include/linux/bug.h
index a9948a9f1093..17a4933c611b 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -42,6 +42,7 @@ void bug_get_file_line(struct bug_entry *bug, const char **file,
struct bug_entry *find_bug(unsigned long bugaddr);
enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs);
+enum bug_trap_type report_bug_entry(struct bug_entry *bug, struct pt_regs *regs);
/* These are defined by the architecture */
int is_valid_bugaddr(unsigned long addr);
@@ -62,6 +63,13 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr,
}
struct bug_entry;
+
+static inline enum bug_trap_type
+report_bug_entry(struct bug_entry *bug, struct pt_regs *regs)
+{
+ return BUG_TRAP_TYPE_BUG;
+}
+
static inline void bug_get_file_line(struct bug_entry *bug, const char **file,
unsigned int *line)
{
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index b3705e8bbe2b..55a44199de87 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -173,6 +173,22 @@ static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
}
}
+static inline void le64_to_cpu_array(u64 *buf, unsigned int words)
+{
+ while (words--) {
+ __le64_to_cpus(buf);
+ buf++;
+ }
+}
+
+static inline void cpu_to_le64_array(u64 *buf, unsigned int words)
+{
+ while (words--) {
+ __cpu_to_le64s(buf);
+ buf++;
+ }
+}
+
static inline void memcpy_from_le32(u32 *dst, const __le32 *src, size_t words)
{
size_t i;
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
index 7fcec025c5e0..559353ad64ac 100644
--- a/include/linux/cc_platform.h
+++ b/include/linux/cc_platform.h
@@ -74,7 +74,7 @@ enum cc_attr {
CC_ATTR_GUEST_UNROLL_STRING_IO,
/**
- * @CC_ATTR_SEV_SNP: Guest SNP is active.
+ * @CC_ATTR_GUEST_SEV_SNP: Guest SNP is active.
*
* The platform/OS is running as a guest/virtual machine and actively
* using AMD SEV-SNP features.
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index bacefa0f1512..0b55a8f6c59e 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -208,7 +208,7 @@
*/
#define DEFINE_FREE(_name, _type, _free) \
- static inline void __free_##_name(void *p) { _type _T = *(_type *)p; _free; }
+ static __always_inline void __free_##_name(void *p) { _type _T = *(_type *)p; _free; }
#define __free(_name) __cleanup(__free_##_name)
@@ -220,7 +220,7 @@
__val; \
})
-static inline __must_check
+static __always_inline __must_check
const volatile void * __must_check_fn(const volatile void *val)
{ return val; }
@@ -278,16 +278,16 @@ const volatile void * __must_check_fn(const volatile void *val)
#define DEFINE_CLASS(_name, _type, _exit, _init, _init_args...) \
typedef _type class_##_name##_t; \
-static inline void class_##_name##_destructor(_type *p) \
+static __always_inline void class_##_name##_destructor(_type *p) \
{ _type _T = *p; _exit; } \
-static inline _type class_##_name##_constructor(_init_args) \
+static __always_inline _type class_##_name##_constructor(_init_args) \
{ _type t = _init; return t; }
#define EXTEND_CLASS(_name, ext, _init, _init_args...) \
typedef class_##_name##_t class_##_name##ext##_t; \
-static inline void class_##_name##ext##_destructor(class_##_name##_t *p)\
+static __always_inline void class_##_name##ext##_destructor(class_##_name##_t *p) \
{ class_##_name##_destructor(p); } \
-static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
+static __always_inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
{ class_##_name##_t t = _init; return t; }
#define CLASS(_name, var) \
@@ -360,7 +360,7 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
})
#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \
- static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \
+ static __always_inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \
{ \
void *_ptr = (void *)(__force unsigned long)*(_exp); \
if (IS_ERR(_ptr)) { \
@@ -368,7 +368,7 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
} \
return _ptr; \
} \
- static inline int class_##_name##_lock_err(class_##_name##_t *_T) \
+ static __always_inline int class_##_name##_lock_err(class_##_name##_t *_T) \
{ \
long _rc = (__force unsigned long)*(_exp); \
if (!_rc) { \
@@ -397,9 +397,9 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
EXTEND_CLASS(_name, _ext, \
({ void *_t = _T; int _RET = (_lock); if (_T && !(_cond)) _t = ERR_PTR(_RET); _t; }), \
class_##_name##_t _T) \
- static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \
+ static __always_inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \
{ return class_##_name##_lock_ptr(_T); } \
- static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \
+ static __always_inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \
{ return class_##_name##_lock_err(_T); }
/*
@@ -479,7 +479,7 @@ typedef struct { \
__VA_ARGS__; \
} class_##_name##_t; \
\
-static inline void class_##_name##_destructor(class_##_name##_t *_T) \
+static __always_inline void class_##_name##_destructor(class_##_name##_t *_T) \
{ \
if (!__GUARD_IS_ERR(_T->lock)) { _unlock; } \
} \
@@ -487,7 +487,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \
__DEFINE_GUARD_LOCK_PTR(_name, &_T->lock)
#define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \
-static inline class_##_name##_t class_##_name##_constructor(_type *l) \
+static __always_inline class_##_name##_t class_##_name##_constructor(_type *l) \
{ \
class_##_name##_t _t = { .lock = l }, *_T = &_t; \
_lock; \
@@ -495,7 +495,7 @@ static inline class_##_name##_t class_##_name##_constructor(_type *l) \
}
#define __DEFINE_LOCK_GUARD_0(_name, _lock) \
-static inline class_##_name##_t class_##_name##_constructor(void) \
+static __always_inline class_##_name##_t class_##_name##_constructor(void) \
{ \
class_##_name##_t _t = { .lock = (void*)1 }, \
*_T __maybe_unused = &_t; \
@@ -521,9 +521,9 @@ __DEFINE_LOCK_GUARD_0(_name, _lock)
if (_T->lock && !(_cond)) _T->lock = ERR_PTR(_RET);\
_t; }), \
typeof_member(class_##_name##_t, lock) l) \
- static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \
+ static __always_inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \
{ return class_##_name##_lock_ptr(_T); } \
- static inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \
+ static __always_inline int class_##_name##_ext##_lock_err(class_##_name##_t *_T) \
{ return class_##_name##_lock_err(_T); }
#define DEFINE_LOCK_GUARD_1_COND_3(_name, _ext, _lock) \
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 0a1b9598940d..3eac51d68426 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -393,6 +393,21 @@ struct ftrace_likely_data {
#define __counted_by_be(member) __counted_by(member)
#endif
+/*
+ * This designates the minimum number of elements a passed array parameter must
+ * have. For example:
+ *
+ * void some_function(u8 param[at_least 7]);
+ *
+ * If a caller passes an array with fewer than 7 elements, the compiler will
+ * emit a warning.
+ */
+#ifndef __CHECKER__
+#define at_least static
+#else
+#define at_least
+#endif
+
/* Do not trap wrapping arithmetic within an annotated function. */
#ifdef CONFIG_UBSAN_INTEGER_WRAP
# define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow")))
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index a9ee4fe55dcf..4073690504a7 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -248,7 +248,8 @@ extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev,
u64 latency_limit_ns);
extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
- struct cpuidle_device *dev);
+ struct cpuidle_device *dev,
+ u64 latency_limit_ns);
extern void cpuidle_use_deepest_state(u64 latency_limit_ns);
#else
static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
@@ -256,7 +257,8 @@ static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
u64 latency_limit_ns)
{return -ENODEV; }
static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev,
+ u64 latency_limit_ns)
{return -ENODEV; }
static inline void cpuidle_use_deepest_state(u64 latency_limit_ns)
{
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index ff8f41ab7ce6..afedfd5bea07 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -126,6 +126,7 @@ extern struct cpumask __cpu_dying_mask;
#define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask)
extern atomic_t __num_online_cpus;
+extern unsigned int __num_possible_cpus;
extern cpumask_t cpus_booted_once_mask;
@@ -729,6 +730,22 @@ void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
}
/**
+ * cpumask_weighted_or - *dstp = *src1p | *src2p and return the weight of the result
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ *
+ * Return: The number of bits set in the resulting cpumask @dstp
+ */
+static __always_inline
+unsigned int cpumask_weighted_or(struct cpumask *dstp, const struct cpumask *src1p,
+ const struct cpumask *src2p)
+{
+ return bitmap_weighted_or(cpumask_bits(dstp), cpumask_bits(src1p),
+ cpumask_bits(src2p), small_cpumask_bits);
+}
+
+/**
* cpumask_xor - *dstp = *src1p ^ *src2p
* @dstp: the cpumask result
* @src1p: the first input
@@ -1005,6 +1022,7 @@ static __always_inline unsigned int cpumask_size(void)
#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x)
#define __cpumask_var_read_mostly __read_mostly
+#define CPUMASK_VAR_NULL NULL
bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
@@ -1051,6 +1069,7 @@ static __always_inline bool cpumask_available(cpumask_var_t mask)
#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
#define __cpumask_var_read_mostly
+#define CPUMASK_VAR_NULL {}
static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
@@ -1136,13 +1155,13 @@ void init_cpu_possible(const struct cpumask *src);
#define __assign_cpu(cpu, mask, val) \
__assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val))
-#define set_cpu_possible(cpu, possible) assign_cpu((cpu), &__cpu_possible_mask, (possible))
#define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_enabled_mask, (enabled))
#define set_cpu_present(cpu, present) assign_cpu((cpu), &__cpu_present_mask, (present))
#define set_cpu_active(cpu, active) assign_cpu((cpu), &__cpu_active_mask, (active))
#define set_cpu_dying(cpu, dying) assign_cpu((cpu), &__cpu_dying_mask, (dying))
void set_cpu_online(unsigned int cpu, bool online);
+void set_cpu_possible(unsigned int cpu, bool possible);
/**
* to_cpumask - convert a NR_CPUS bitmap to a struct cpumask *
@@ -1195,7 +1214,12 @@ static __always_inline unsigned int num_online_cpus(void)
{
return raw_atomic_read(&__num_online_cpus);
}
-#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
+
+static __always_inline unsigned int num_possible_cpus(void)
+{
+ return __num_possible_cpus;
+}
+
#define num_enabled_cpus() cpumask_weight(cpu_enabled_mask)
#define num_present_cpus() cpumask_weight(cpu_present_mask)
#define num_active_cpus() cpumask_weight(cpu_active_mask)
diff --git a/include/linux/delay.h b/include/linux/delay.h
index 89866bab100d..46412c00033a 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -68,7 +68,7 @@ void usleep_range_state(unsigned long min, unsigned long max,
* @min: Minimum time in microseconds to sleep
* @max: Maximum time in microseconds to sleep
*
- * For basic information please refere to usleep_range_state().
+ * For basic information please refer to usleep_range_state().
*
* The task will be in the state TASK_UNINTERRUPTIBLE during the sleep.
*/
@@ -82,10 +82,10 @@ static inline void usleep_range(unsigned long min, unsigned long max)
* @min: Minimum time in microseconds to sleep
* @max: Maximum time in microseconds to sleep
*
- * For basic information please refere to usleep_range_state().
+ * For basic information please refer to usleep_range_state().
*
* The sleeping task has the state TASK_IDLE during the sleep to prevent
- * contribution to the load avarage.
+ * contribution to the load average.
*/
static inline void usleep_range_idle(unsigned long min, unsigned long max)
{
@@ -96,7 +96,7 @@ static inline void usleep_range_idle(unsigned long min, unsigned long max)
* ssleep - wrapper for seconds around msleep
* @seconds: Requested sleep duration in seconds
*
- * Please refere to msleep() for detailed information.
+ * Please refer to msleep() for detailed information.
*/
static inline void ssleep(unsigned int seconds)
{
diff --git a/include/linux/devfreq-governor.h b/include/linux/devfreq-governor.h
new file mode 100644
index 000000000000..dfdd0160a29f
--- /dev/null
+++ b/include/linux/devfreq-governor.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * governor.h - internal header for devfreq governors.
+ *
+ * Copyright (C) 2011 Samsung Electronics
+ * MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This header is for devfreq governors
+ */
+
+#ifndef __LINUX_DEVFREQ_DEVFREQ_H__
+#define __LINUX_DEVFREQ_DEVFREQ_H__
+
+#include <linux/devfreq.h>
+
+#define DEVFREQ_NAME_LEN 16
+
+#define to_devfreq(DEV) container_of((DEV), struct devfreq, dev)
+
+/* Devfreq events */
+#define DEVFREQ_GOV_START 0x1
+#define DEVFREQ_GOV_STOP 0x2
+#define DEVFREQ_GOV_UPDATE_INTERVAL 0x3
+#define DEVFREQ_GOV_SUSPEND 0x4
+#define DEVFREQ_GOV_RESUME 0x5
+
+#define DEVFREQ_MIN_FREQ 0
+#define DEVFREQ_MAX_FREQ ULONG_MAX
+
+/*
+ * Definition of the governor feature flags
+ * - DEVFREQ_GOV_FLAG_IMMUTABLE
+ * : This governor is never changeable to other governors.
+ * - DEVFREQ_GOV_FLAG_IRQ_DRIVEN
+ * : The devfreq won't schedule the work for this governor.
+ */
+#define DEVFREQ_GOV_FLAG_IMMUTABLE BIT(0)
+#define DEVFREQ_GOV_FLAG_IRQ_DRIVEN BIT(1)
+
+/*
+ * Definition of governor attribute flags except for common sysfs attributes
+ * - DEVFREQ_GOV_ATTR_POLLING_INTERVAL
+ * : Indicate polling_interval sysfs attribute
+ * - DEVFREQ_GOV_ATTR_TIMER
+ * : Indicate timer sysfs attribute
+ */
+#define DEVFREQ_GOV_ATTR_POLLING_INTERVAL BIT(0)
+#define DEVFREQ_GOV_ATTR_TIMER BIT(1)
+
+/**
+ * struct devfreq_governor - Devfreq policy governor
+ * @node: list node - contains registered devfreq governors
+ * @name: Governor's name
+ * @attrs: Governor's sysfs attribute flags
+ * @flags: Governor's feature flags
+ * @get_target_freq: Returns desired operating frequency for the device.
+ * Basically, get_target_freq will run
+ * devfreq_dev_profile.get_dev_status() to get the
+ * status of the device (load = busy_time / total_time).
+ * @event_handler: Callback for devfreq core framework to notify events
+ * to governors. Events include per device governor
+ * init and exit, opp changes out of devfreq, suspend
+ * and resume of per device devfreq during device idle.
+ *
+ * Note that the callbacks are called with devfreq->lock locked by devfreq.
+ */
+struct devfreq_governor {
+ struct list_head node;
+
+ const char name[DEVFREQ_NAME_LEN];
+ const u64 attrs;
+ const u64 flags;
+ int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
+ int (*event_handler)(struct devfreq *devfreq,
+ unsigned int event, void *data);
+};
+
+void devfreq_monitor_start(struct devfreq *devfreq);
+void devfreq_monitor_stop(struct devfreq *devfreq);
+void devfreq_monitor_suspend(struct devfreq *devfreq);
+void devfreq_monitor_resume(struct devfreq *devfreq);
+void devfreq_update_interval(struct devfreq *devfreq, unsigned int *delay);
+
+int devfreq_add_governor(struct devfreq_governor *governor);
+int devfreq_remove_governor(struct devfreq_governor *governor);
+
+int devm_devfreq_add_governor(struct device *dev,
+ struct devfreq_governor *governor);
+
+int devfreq_update_status(struct devfreq *devfreq, unsigned long freq);
+int devfreq_update_target(struct devfreq *devfreq, unsigned long freq);
+void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq,
+ unsigned long *max_freq);
+
+static inline int devfreq_update_stats(struct devfreq *df)
+{
+ if (!df->profile->get_dev_status)
+ return -EINVAL;
+
+ return df->profile->get_dev_status(df->dev.parent, &df->last_status);
+}
+#endif /* __LINUX_DEVFREQ_DEVFREQ_H__ */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index a98cc39e7aaa..b23ff8b83219 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1126,6 +1126,8 @@ static inline bool efi_runtime_disabled(void) { return true; }
extern void efi_call_virt_check_flags(unsigned long flags, const void *caller);
extern unsigned long efi_call_virt_save_flags(void);
+void efi_runtime_assert_lock_held(void);
+
enum efi_secureboot_mode {
efi_secureboot_mode_unset,
efi_secureboot_mode_unknown,
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 61d50571ad88..43aa6153dc57 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -54,6 +54,8 @@ struct em_perf_table {
/**
* struct em_perf_domain - Performance domain
* @em_table: Pointer to the runtime modifiable em_perf_table
+ * @node: node in em_pd_list (in energy_model.c)
+ * @id: A unique ID number for each performance domain
* @nr_perf_states: Number of performance states
* @min_perf_state: Minimum allowed Performance State index
* @max_perf_state: Maximum allowed Performance State index
@@ -71,6 +73,8 @@ struct em_perf_table {
*/
struct em_perf_domain {
struct em_perf_table __rcu *em_table;
+ struct list_head node;
+ int id;
int nr_perf_states;
int min_perf_state;
int max_perf_state;
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 7177436f0f9e..87efb38b7081 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -3,11 +3,11 @@
#define __LINUX_ENTRYCOMMON_H
#include <linux/irq-entry-common.h>
+#include <linux/livepatch.h>
#include <linux/ptrace.h>
+#include <linux/resume_user_mode.h>
#include <linux/seccomp.h>
#include <linux/sched.h>
-#include <linux/livepatch.h>
-#include <linux/resume_user_mode.h>
#include <asm/entry-common.h>
#include <asm/syscall.h>
@@ -37,6 +37,7 @@
SYSCALL_WORK_SYSCALL_AUDIT | \
SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
ARCH_SYSCALL_WORK_ENTER)
+
#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \
SYSCALL_WORK_SYSCALL_TRACE | \
SYSCALL_WORK_SYSCALL_AUDIT | \
@@ -44,25 +45,7 @@
SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
ARCH_SYSCALL_WORK_EXIT)
-/**
- * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
- * @regs: Pointer to currents pt_regs
- *
- * Invoked from architecture specific syscall entry code with interrupts
- * disabled. The calling code has to be non-instrumentable. When the
- * function returns all state is correct, interrupts are enabled and the
- * subsequent functions can be instrumented.
- *
- * This handles lockdep, RCU (context tracking) and tracing state, i.e.
- * the functionality provided by enter_from_user_mode().
- *
- * This is invoked when there is extra architecture specific functionality
- * to be done between establishing state and handling user mode entry work.
- */
-void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
-
-long syscall_trace_enter(struct pt_regs *regs, long syscall,
- unsigned long work);
+long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work);
/**
* syscall_enter_from_user_mode_work - Check and handle work before invoking
@@ -71,8 +54,8 @@ long syscall_trace_enter(struct pt_regs *regs, long syscall,
* @syscall: The syscall number
*
* Invoked from architecture specific syscall entry code with interrupts
- * enabled after invoking syscall_enter_from_user_mode_prepare() and extra
- * architecture specific work.
+ * enabled after invoking enter_from_user_mode(), enabling interrupts and
+ * extra architecture specific work.
*
* Returns: The original or a modified syscall number
*
@@ -108,8 +91,9 @@ static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *re
* function returns all state is correct, interrupts are enabled and the
* subsequent functions can be instrumented.
*
- * This is combination of syscall_enter_from_user_mode_prepare() and
- * syscall_enter_from_user_mode_work().
+ * This is the combination of enter_from_user_mode() and
+ * syscall_enter_from_user_mode_work() to be used when there is no
+ * architecture specific work to be done between the two.
*
* Returns: The original or a modified syscall number. See
* syscall_enter_from_user_mode_work() for further explanation.
@@ -162,7 +146,7 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
local_irq_enable();
}
- rseq_syscall(regs);
+ rseq_debug_syscall_return(regs);
/*
* Do one-time syscall specific work. If these work items are
@@ -172,7 +156,7 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
if (unlikely(work & SYSCALL_WORK_EXIT))
syscall_exit_work(regs, work);
local_irq_disable_exit_to_user();
- exit_to_user_mode_prepare(regs);
+ syscall_exit_to_user_mode_prepare(regs);
}
/**
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 32884c9721e5..0a8c6c4d1a82 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -22,14 +22,18 @@ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */
extern unsigned int freeze_timeout_msecs;
/*
- * Check if a process has been frozen
+ * Check if a process has been frozen for PM or cgroup1 freezer. Note that
+ * cgroup2 freezer uses the job control mechanism and does not interact with
+ * the PM freezer.
*/
extern bool frozen(struct task_struct *p);
extern bool freezing_slow_path(struct task_struct *p);
/*
- * Check if there is a request to freeze a process
+ * Check if there is a request to freeze a task from PM or cgroup1 freezer.
+ * Note that cgroup2 freezer uses the job control mechanism and does not
+ * interact with the PM freezer.
*/
static inline bool freezing(struct task_struct *p)
{
@@ -63,9 +67,9 @@ extern bool freeze_task(struct task_struct *p);
extern bool set_freezable(void);
#ifdef CONFIG_CGROUP_FREEZER
-extern bool cgroup_freezing(struct task_struct *task);
+extern bool cgroup1_freezing(struct task_struct *task);
#else /* !CONFIG_CGROUP_FREEZER */
-static inline bool cgroup_freezing(struct task_struct *task)
+static inline bool cgroup1_freezing(struct task_struct *task)
{
return false;
}
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 71ac78b9f834..11cab07f322a 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -11,7 +11,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
-void huge_pmd_set_accessed(struct vm_fault *vmf);
+bool huge_pmd_set_accessed(struct vm_fault *vmf);
int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
struct vm_area_struct *vma);
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index c0397423d3a8..e9ade2ff4af6 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -152,7 +152,7 @@ struct rapl_if_priv {
union rapl_reg reg_unit;
union rapl_reg regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
int limits[RAPL_DOMAIN_MAX];
- int (*read_raw)(int id, struct reg_action *ra);
+ int (*read_raw)(int id, struct reg_action *ra, bool atomic);
int (*write_raw)(int id, struct reg_action *ra);
void *defaults;
void *rpi;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 51b6484c0493..266f2b39213a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -109,6 +109,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
* @name: name of the device
* @dev_id: cookie to identify the device
* @percpu_dev_id: cookie to identify the device
+ * @affinity: CPUs this irqaction is allowed to run on
* @next: pointer to the next irqaction for shared interrupts
* @irq: interrupt number
* @flags: flags (see IRQF_* above)
@@ -121,8 +122,11 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
*/
struct irqaction {
irq_handler_t handler;
- void *dev_id;
- void __percpu *percpu_dev_id;
+ union {
+ void *dev_id;
+ void __percpu *percpu_dev_id;
+ };
+ const struct cpumask *affinity;
struct irqaction *next;
irq_handler_t thread_fn;
struct task_struct *thread;
@@ -179,7 +183,7 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler,
extern int __must_check
__request_percpu_irq(unsigned int irq, irq_handler_t handler,
unsigned long flags, const char *devname,
- void __percpu *percpu_dev_id);
+ const cpumask_t *affinity, void __percpu *percpu_dev_id);
extern int __must_check
request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags,
@@ -190,12 +194,21 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler,
const char *devname, void __percpu *percpu_dev_id)
{
return __request_percpu_irq(irq, handler, 0,
- devname, percpu_dev_id);
+ devname, NULL, percpu_dev_id);
+}
+
+static inline int __must_check
+request_percpu_irq_affinity(unsigned int irq, irq_handler_t handler,
+ const char *devname, const cpumask_t *affinity,
+ void __percpu *percpu_dev_id)
+{
+ return __request_percpu_irq(irq, handler, 0,
+ devname, affinity, percpu_dev_id);
}
extern int __must_check
-request_percpu_nmi(unsigned int irq, irq_handler_t handler,
- const char *devname, void __percpu *dev);
+request_percpu_nmi(unsigned int irq, irq_handler_t handler, const char *name,
+ const struct cpumask *affinity, void __percpu *dev_id);
extern const void *free_irq(unsigned int, void *);
extern void free_percpu_irq(unsigned int, void __percpu *);
diff --git a/include/linux/irq-entry-common.h b/include/linux/irq-entry-common.h
index ba1ed42f8a1c..6ab913e57da0 100644
--- a/include/linux/irq-entry-common.h
+++ b/include/linux/irq-entry-common.h
@@ -2,11 +2,12 @@
#ifndef __LINUX_IRQENTRYCOMMON_H
#define __LINUX_IRQENTRYCOMMON_H
+#include <linux/context_tracking.h>
+#include <linux/kmsan.h>
+#include <linux/rseq_entry.h>
#include <linux/static_call_types.h>
#include <linux/syscalls.h>
-#include <linux/context_tracking.h>
#include <linux/tick.h>
-#include <linux/kmsan.h>
#include <linux/unwind_deferred.h>
#include <asm/entry-common.h>
@@ -29,7 +30,7 @@
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
- _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
+ _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ | \
ARCH_EXIT_TO_USER_MODE_WORK)
/**
@@ -67,6 +68,7 @@ static __always_inline bool arch_in_rcu_eqs(void) { return false; }
/**
* enter_from_user_mode - Establish state when coming from user mode
+ * @regs: Pointer to currents pt_regs
*
* Syscall/interrupt entry disables interrupts, but user mode is traced as
* interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
@@ -195,14 +197,11 @@ static __always_inline void arch_exit_to_user_mode(void) { }
*/
void arch_do_signal_or_restart(struct pt_regs *regs);
-/**
- * exit_to_user_mode_loop - do any pending work before leaving to user space
- */
-unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
- unsigned long ti_work);
+/* Handle pending TIF work */
+unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work);
/**
- * exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
+ * __exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
* @regs: Pointer to pt_regs on entry stack
*
* 1) check that interrupts are disabled
@@ -210,8 +209,10 @@ unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
* 3) call exit_to_user_mode_loop() if any flags from
* EXIT_TO_USER_MODE_WORK are set
* 4) check that interrupts are still disabled
+ *
+ * Don't invoke directly, use the syscall/irqentry_ prefixed variants below
*/
-static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
+static __always_inline void __exit_to_user_mode_prepare(struct pt_regs *regs)
{
unsigned long ti_work;
@@ -225,13 +226,52 @@ static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
ti_work = exit_to_user_mode_loop(regs, ti_work);
arch_exit_to_user_mode_prepare(regs, ti_work);
+}
+static __always_inline void __exit_to_user_mode_validate(void)
+{
/* Ensure that kernel state is sane for a return to userspace */
kmap_assert_nomap();
lockdep_assert_irqs_disabled();
lockdep_sys_exit();
}
+/* Temporary workaround to keep ARM64 alive */
+static __always_inline void exit_to_user_mode_prepare_legacy(struct pt_regs *regs)
+{
+ __exit_to_user_mode_prepare(regs);
+ rseq_exit_to_user_mode_legacy();
+ __exit_to_user_mode_validate();
+}
+
+/**
+ * syscall_exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
+ * @regs: Pointer to pt_regs on entry stack
+ *
+ * Wrapper around __exit_to_user_mode_prepare() to separate the exit work for
+ * syscalls and interrupts.
+ */
+static __always_inline void syscall_exit_to_user_mode_prepare(struct pt_regs *regs)
+{
+ __exit_to_user_mode_prepare(regs);
+ rseq_syscall_exit_to_user_mode();
+ __exit_to_user_mode_validate();
+}
+
+/**
+ * irqentry_exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
+ * @regs: Pointer to pt_regs on entry stack
+ *
+ * Wrapper around __exit_to_user_mode_prepare() to separate the exit work for
+ * syscalls and interrupts.
+ */
+static __always_inline void irqentry_exit_to_user_mode_prepare(struct pt_regs *regs)
+{
+ __exit_to_user_mode_prepare(regs);
+ rseq_irqentry_exit_to_user_mode();
+ __exit_to_user_mode_validate();
+}
+
/**
* exit_to_user_mode - Fixup state when exiting to user mode
*
@@ -274,7 +314,11 @@ static __always_inline void exit_to_user_mode(void)
*
* The function establishes state (lockdep, RCU (context tracking), tracing)
*/
-void irqentry_enter_from_user_mode(struct pt_regs *regs);
+static __always_inline void irqentry_enter_from_user_mode(struct pt_regs *regs)
+{
+ enter_from_user_mode(regs);
+ rseq_note_user_irq_entry();
+}
/**
* irqentry_exit_to_user_mode - Interrupt exit work
@@ -289,7 +333,13 @@ void irqentry_enter_from_user_mode(struct pt_regs *regs);
* Interrupt exit is not invoking #1 which is the syscall specific one time
* work.
*/
-void irqentry_exit_to_user_mode(struct pt_regs *regs);
+static __always_inline void irqentry_exit_to_user_mode(struct pt_regs *regs)
+{
+ instrumentation_begin();
+ irqentry_exit_to_user_mode_prepare(regs);
+ instrumentation_end();
+ exit_to_user_mode();
+}
#ifndef irqentry_state
/**
@@ -354,6 +404,7 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
* Conditional reschedule with additional sanity checks.
*/
void raw_irqentry_exit_cond_resched(void);
+
#ifdef CONFIG_PREEMPT_DYNAMIC
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
#define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c67e76fbcc07..4a9f1d7b08c3 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -655,7 +655,6 @@ extern void handle_bad_irq(struct irq_desc *desc);
extern void handle_nested_irq(unsigned int irq);
extern void handle_fasteoi_nmi(struct irq_desc *desc);
-extern void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc);
extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg);
extern int irq_chip_pm_get(struct irq_data *data);
@@ -719,10 +718,6 @@ static inline void irq_set_chip_and_handler(unsigned int irq,
}
extern int irq_set_percpu_devid(unsigned int irq);
-extern int irq_set_percpu_devid_partition(unsigned int irq,
- const struct cpumask *affinity);
-extern int irq_get_percpu_devid_partition(unsigned int irq,
- struct cpumask *affinity);
extern void
__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 136f2980cba3..c5afd053ae32 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -2,8 +2,9 @@
#ifndef _LINUX_IRQ_WORK_H
#define _LINUX_IRQ_WORK_H
-#include <linux/smp_types.h>
+#include <linux/irq_work_types.h>
#include <linux/rcuwait.h>
+#include <linux/smp_types.h>
/*
* An entry can be in one of four states:
@@ -14,12 +15,6 @@
* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
*/
-struct irq_work {
- struct __call_single_node node;
- void (*func)(struct irq_work *);
- struct rcuwait irqwait;
-};
-
#define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \
.node = { .u_flags = (_flags), }, \
.func = (_func), \
diff --git a/include/linux/irq_work_types.h b/include/linux/irq_work_types.h
new file mode 100644
index 000000000000..73abec5bb06e
--- /dev/null
+++ b/include/linux/irq_work_types.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_IRQ_WORK_TYPES_H
+#define _LINUX_IRQ_WORK_TYPES_H
+
+#include <linux/smp_types.h>
+#include <linux/types.h>
+
+struct irq_work {
+ struct __call_single_node node;
+ void (*func)(struct irq_work *);
+ struct rcuwait irqwait;
+};
+
+#endif
diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h
index d5e6024cb2a8..bc4ddacd6ddc 100644
--- a/include/linux/irqchip.h
+++ b/include/linux/irqchip.h
@@ -17,12 +17,18 @@
#include <linux/of_irq.h>
#include <linux/platform_device.h>
+typedef int (*platform_irq_probe_t)(struct platform_device *, struct device_node *);
+
/* Undefined on purpose */
extern of_irq_init_cb_t typecheck_irq_init_cb;
+extern platform_irq_probe_t typecheck_irq_probe;
#define typecheck_irq_init_cb(fn) \
(__typecheck(typecheck_irq_init_cb, &fn) ? fn : fn)
+#define typecheck_irq_probe(fn) \
+ (__typecheck(typecheck_irq_probe, &fn) ? fn : fn)
+
/*
* This macro must be used by the different irqchip drivers to declare
* the association between their DT compatible string and their
@@ -42,7 +48,7 @@ extern int platform_irqchip_probe(struct platform_device *pdev);
static const struct of_device_id drv_name##_irqchip_match_table[] = {
#define IRQCHIP_MATCH(compat, fn) { .compatible = compat, \
- .data = typecheck_irq_init_cb(fn), },
+ .data = typecheck_irq_probe(fn), },
#define IRQCHIP_PLATFORM_DRIVER_END(drv_name, ...) \
diff --git a/include/linux/irqchip/irq-partition-percpu.h b/include/linux/irqchip/irq-partition-percpu.h
deleted file mode 100644
index b35ee22c278f..000000000000
--- a/include/linux/irqchip/irq-partition-percpu.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2016 ARM Limited, All Rights Reserved.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- */
-
-#ifndef __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H
-#define __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H
-
-#include <linux/fwnode.h>
-#include <linux/cpumask_types.h>
-#include <linux/irqdomain.h>
-
-struct partition_affinity {
- cpumask_t mask;
- void *partition_id;
-};
-
-struct partition_desc;
-
-#ifdef CONFIG_PARTITION_PERCPU
-int partition_translate_id(struct partition_desc *desc, void *partition_id);
-struct partition_desc *partition_create_desc(struct fwnode_handle *fwnode,
- struct partition_affinity *parts,
- int nr_parts,
- int chained_irq,
- const struct irq_domain_ops *ops);
-struct irq_domain *partition_get_domain(struct partition_desc *dsc);
-#else
-static inline int partition_translate_id(struct partition_desc *desc,
- void *partition_id)
-{
- return -EINVAL;
-}
-
-static inline
-struct partition_desc *partition_create_desc(struct fwnode_handle *fwnode,
- struct partition_affinity *parts,
- int nr_parts,
- int chained_irq,
- const struct irq_domain_ops *ops)
-{
- return NULL;
-}
-
-static inline
-struct irq_domain *partition_get_domain(struct partition_desc *dsc)
-{
- return NULL;
-}
-#endif
-
-#endif /* __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H */
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index fd091c35d572..37e0b5b5600a 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -82,7 +82,6 @@ struct irq_desc {
int threads_handled_last;
raw_spinlock_t lock;
struct cpumask *percpu_enabled;
- const struct cpumask *percpu_affinity;
#ifdef CONFIG_SMP
const struct cpumask *affinity_hint;
struct irq_affinity_notify *affinity_notify;
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 4a86e6b915dd..952d3c8dd6b7 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -44,6 +44,23 @@ struct irq_fwspec {
u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS];
};
+/**
+ * struct irq_fwspec_info - firmware provided IRQ information structure
+ *
+ * @flags: Information validity flags
+ * @affinity: Affinity mask for this interrupt
+ *
+ * This structure reports firmware-specific information about an
+ * interrupt. The only significant information is the affinity of a
+ * per-CPU interrupt, but this is designed to be extended as required.
+ */
+struct irq_fwspec_info {
+ unsigned long flags;
+ const struct cpumask *affinity;
+};
+
+#define IRQ_FWSPEC_INFO_AFFINITY_VALID BIT(0)
+
/* Conversion function from of_phandle_args fields to fwspec */
void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
unsigned int count, struct irq_fwspec *fwspec);
@@ -69,6 +86,9 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
* @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and
* linux irq type value (@out_type). This is a generalised @xlate
* (over struct irq_fwspec) and is preferred if provided.
+ * @get_fwspec_info:
+ * Given @fwspec, report additional firmware-provided information in
+ * @info. Optional.
* @debug_show: For domains to show specific data for an interrupt in debugfs.
*
* Functions below are provided by the driver and called whenever a new mapping
@@ -96,6 +116,7 @@ struct irq_domain_ops {
void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
unsigned long *out_hwirq, unsigned int *out_type);
+ int (*get_fwspec_info)(struct irq_fwspec *fwspec, struct irq_fwspec_info *info);
#endif
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
void (*debug_show)(struct seq_file *m, struct irq_domain *d,
@@ -602,6 +623,8 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_bas
int irq_domain_disconnect_hierarchy(struct irq_domain *domain, unsigned int virq);
+int irq_populate_fwspec_info(struct irq_fwspec *fwspec, struct irq_fwspec_info *info);
+
static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
{
return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY;
@@ -685,6 +708,10 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain)
return false;
}
+static inline int irq_populate_fwspec_info(struct irq_fwspec *fwspec, struct irq_fwspec_info *info)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
#ifdef CONFIG_GENERIC_MSI_IRQ
@@ -703,12 +730,6 @@ static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsig
}
#endif
-/* Deprecated functions. Will be removed in the merge window */
-static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node)
-{
- return node ? &node->fwnode : NULL;
-}
-
static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
const struct irq_domain_ops *ops,
void *host_data)
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 490464c205b4..a568d8e6f4e8 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -11,8 +11,22 @@
#ifdef KVM_SUB_MODULES
#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol) \
EXPORT_SYMBOL_FOR_MODULES(symbol, __stringify(KVM_SUB_MODULES))
+#define EXPORT_SYMBOL_FOR_KVM(symbol) \
+ EXPORT_SYMBOL_FOR_MODULES(symbol, "kvm," __stringify(KVM_SUB_MODULES))
#else
#define EXPORT_SYMBOL_FOR_KVM_INTERNAL(symbol)
+/*
+ * Allow architectures to provide a custom EXPORT_SYMBOL_FOR_KVM, but only
+ * if there are no submodules, e.g. to allow suppressing exports if KVM=m, but
+ * kvm.ko won't actually be built (due to lack of at least one submodule).
+ */
+#ifndef EXPORT_SYMBOL_FOR_KVM
+#if IS_MODULE(CONFIG_KVM)
+#define EXPORT_SYMBOL_FOR_KVM(symbol) EXPORT_SYMBOL_FOR_MODULES(symbol, "kvm")
+#else
+#define EXPORT_SYMBOL_FOR_KVM(symbol)
+#endif /* IS_MODULE(CONFIG_KVM) */
+#endif /* EXPORT_SYMBOL_FOR_KVM */
#endif
#ifndef __ASSEMBLER__
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 67964dc4db95..dd634103b014 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -616,7 +616,7 @@ do { \
#define lockdep_assert_in_softirq() \
do { \
WARN_ON_ONCE(__lockdep_enabled && \
- (!in_softirq() || in_irq() || in_nmi())); \
+ (!in_softirq() || in_hardirq() || in_nmi())); \
} while (0)
extern void lockdep_assert_in_softirq_func(void);
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 0c214256216f..ba1515160894 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -96,17 +96,8 @@ int set_memory_block_size_order(unsigned int order);
#define MEM_GOING_ONLINE (1<<3)
#define MEM_CANCEL_ONLINE (1<<4)
#define MEM_CANCEL_OFFLINE (1<<5)
-#define MEM_PREPARE_ONLINE (1<<6)
-#define MEM_FINISH_OFFLINE (1<<7)
struct memory_notify {
- /*
- * The altmap_start_pfn and altmap_nr_pages fields are designated for
- * specifying the altmap range and are exclusively intended for use in
- * MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
- */
- unsigned long altmap_start_pfn;
- unsigned long altmap_nr_pages;
unsigned long start_pfn;
unsigned long nr_pages;
};
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 23f038a16231..f2f16cdd73ee 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -58,22 +58,6 @@ typedef int __bitwise mhp_t;
* implies the node id (nid).
*/
#define MHP_NID_IS_MGID ((__force mhp_t)BIT(2))
-/*
- * The hotplugged memory is completely inaccessible while the memory is
- * offline. The memory provider will handle MEM_PREPARE_ONLINE /
- * MEM_FINISH_OFFLINE notifications and make the memory accessible.
- *
- * This flag is only relevant when used along with MHP_MEMMAP_ON_MEMORY,
- * because the altmap cannot be written (e.g., poisoned) when adding
- * memory -- before it is set online.
- *
- * This allows for adding memory with an altmap that is not currently
- * made available by a hypervisor. When onlining that memory, the
- * hypervisor can be instructed to make that memory available, and
- * the onlining phase will not require any memory allocations, which is
- * helpful in low-memory situations.
- */
-#define MHP_OFFLINE_INACCESSIBLE ((__force mhp_t)BIT(3))
/*
* Extended parameters for memory hotplug:
@@ -123,7 +107,7 @@ extern void adjust_present_page_count(struct page *page,
long nr_pages);
/* VM interface that may be used by firmware interface */
extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
- struct zone *zone, bool mhp_off_inaccessible);
+ struct zone *zone);
extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
extern int online_pages(unsigned long pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index e5951ba12a28..30c7aecbd245 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -25,7 +25,6 @@ struct vmem_altmap {
unsigned long free;
unsigned long align;
unsigned long alloc;
- bool inaccessible;
};
/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 170594b5cb6b..8dc0a07570cc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2408,31 +2408,6 @@ struct zap_details {
/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */
#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))
-#ifdef CONFIG_SCHED_MM_CID
-void sched_mm_cid_before_execve(struct task_struct *t);
-void sched_mm_cid_after_execve(struct task_struct *t);
-void sched_mm_cid_fork(struct task_struct *t);
-void sched_mm_cid_exit_signals(struct task_struct *t);
-static inline int task_mm_cid(struct task_struct *t)
-{
- return t->mm_cid;
-}
-#else
-static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
-static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
-static inline void sched_mm_cid_fork(struct task_struct *t) { }
-static inline void sched_mm_cid_exit_signals(struct task_struct *t) { }
-static inline int task_mm_cid(struct task_struct *t)
-{
- /*
- * Use the processor id as a fall-back when the mm cid feature is
- * disabled. This provides functional per-cpu data structure accesses
- * in user-space, althrough it won't provide the memory usage benefits.
- */
- return raw_smp_processor_id();
-}
-#endif
-
#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
#else
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 90e5790c318f..3b7d05e7169c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -20,6 +20,7 @@
#include <linux/seqlock.h>
#include <linux/percpu_counter.h>
#include <linux/types.h>
+#include <linux/rseq_types.h>
#include <linux/bitmap.h>
#include <asm/mmu.h>
@@ -922,14 +923,6 @@ struct vm_area_struct {
#define vma_policy(vma) NULL
#endif
-#ifdef CONFIG_SCHED_MM_CID
-struct mm_cid {
- u64 time;
- int cid;
- int recent_cid;
-};
-#endif
-
/*
* Opaque type representing current mm_struct flag state. Must be accessed via
* mm_flags_xxx() helper functions.
@@ -991,44 +984,9 @@ struct mm_struct {
*/
atomic_t mm_users;
-#ifdef CONFIG_SCHED_MM_CID
- /**
- * @pcpu_cid: Per-cpu current cid.
- *
- * Keep track of the currently allocated mm_cid for each cpu.
- * The per-cpu mm_cid values are serialized by their respective
- * runqueue locks.
- */
- struct mm_cid __percpu *pcpu_cid;
- /*
- * @mm_cid_next_scan: Next mm_cid scan (in jiffies).
- *
- * When the next mm_cid scan is due (in jiffies).
- */
- unsigned long mm_cid_next_scan;
- /**
- * @nr_cpus_allowed: Number of CPUs allowed for mm.
- *
- * Number of CPUs allowed in the union of all mm's
- * threads allowed CPUs.
- */
- unsigned int nr_cpus_allowed;
- /**
- * @max_nr_cid: Maximum number of allowed concurrency
- * IDs allocated.
- *
- * Track the highest number of allowed concurrency IDs
- * allocated for the mm.
- */
- atomic_t max_nr_cid;
- /**
- * @cpus_allowed_lock: Lock protecting mm cpus_allowed.
- *
- * Provide mutual exclusion for mm cpus_allowed and
- * mm nr_cpus_allowed updates.
- */
- raw_spinlock_t cpus_allowed_lock;
-#endif
+ /* MM CID related storage */
+ struct mm_mm_cid mm_cid;
+
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* size of all page tables */
#endif
@@ -1370,37 +1328,6 @@ static inline void vma_iter_init(struct vma_iterator *vmi,
}
#ifdef CONFIG_SCHED_MM_CID
-
-enum mm_cid_state {
- MM_CID_UNSET = -1U, /* Unset state has lazy_put flag set. */
- MM_CID_LAZY_PUT = (1U << 31),
-};
-
-static inline bool mm_cid_is_unset(int cid)
-{
- return cid == MM_CID_UNSET;
-}
-
-static inline bool mm_cid_is_lazy_put(int cid)
-{
- return !mm_cid_is_unset(cid) && (cid & MM_CID_LAZY_PUT);
-}
-
-static inline bool mm_cid_is_valid(int cid)
-{
- return !(cid & MM_CID_LAZY_PUT);
-}
-
-static inline int mm_cid_set_lazy_put(int cid)
-{
- return cid | MM_CID_LAZY_PUT;
-}
-
-static inline int mm_cid_clear_lazy_put(int cid)
-{
- return cid & ~MM_CID_LAZY_PUT;
-}
-
/*
* mm_cpus_allowed: Union of all mm's threads allowed CPUs.
*/
@@ -1415,37 +1342,21 @@ static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm)
}
/* Accessor for struct mm_struct's cidmask. */
-static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
+static inline unsigned long *mm_cidmask(struct mm_struct *mm)
{
unsigned long cid_bitmap = (unsigned long)mm_cpus_allowed(mm);
/* Skip mm_cpus_allowed */
cid_bitmap += cpumask_size();
- return (struct cpumask *)cid_bitmap;
+ return (unsigned long *)cid_bitmap;
}
-static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
-{
- int i;
-
- for_each_possible_cpu(i) {
- struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i);
-
- pcpu_cid->cid = MM_CID_UNSET;
- pcpu_cid->recent_cid = MM_CID_UNSET;
- pcpu_cid->time = 0;
- }
- mm->nr_cpus_allowed = p->nr_cpus_allowed;
- atomic_set(&mm->max_nr_cid, 0);
- raw_spin_lock_init(&mm->cpus_allowed_lock);
- cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask);
- cpumask_clear(mm_cidmask(mm));
-}
+void mm_init_cid(struct mm_struct *mm, struct task_struct *p);
static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *p)
{
- mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid);
- if (!mm->pcpu_cid)
+ mm->mm_cid.pcpu = alloc_percpu_noprof(struct mm_cid_pcpu);
+ if (!mm->mm_cid.pcpu)
return -ENOMEM;
mm_init_cid(mm, p);
return 0;
@@ -1454,37 +1365,24 @@ static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *
static inline void mm_destroy_cid(struct mm_struct *mm)
{
- free_percpu(mm->pcpu_cid);
- mm->pcpu_cid = NULL;
+ free_percpu(mm->mm_cid.pcpu);
+ mm->mm_cid.pcpu = NULL;
}
static inline unsigned int mm_cid_size(void)
{
- return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask(). */
+ /* mm_cpus_allowed(), mm_cidmask(). */
+ return cpumask_size() + bitmap_size(num_possible_cpus());
}
-static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask)
-{
- struct cpumask *mm_allowed = mm_cpus_allowed(mm);
-
- if (!mm)
- return;
- /* The mm_cpus_allowed is the union of each thread allowed CPUs masks. */
- raw_spin_lock(&mm->cpus_allowed_lock);
- cpumask_or(mm_allowed, mm_allowed, cpumask);
- WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed));
- raw_spin_unlock(&mm->cpus_allowed_lock);
-}
#else /* CONFIG_SCHED_MM_CID */
static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { }
static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; }
static inline void mm_destroy_cid(struct mm_struct *mm) { }
-
static inline unsigned int mm_cid_size(void)
{
return 0;
}
-static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { }
#endif /* CONFIG_SCHED_MM_CID */
struct mmu_gather;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index d415dd15a0a9..8003e3218c46 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -701,9 +701,6 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
void pci_msi_mask_irq(struct irq_data *data);
void pci_msi_unmask_irq(struct irq_data *data);
-struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
- struct msi_domain_info *info,
- struct irq_domain *parent);
u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev);
u32 pci_msi_map_rid_ctlr_node(struct pci_dev *pdev, struct device_node **node);
struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev);
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 1db8543dfc8a..1c2bc0281807 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -43,6 +43,8 @@ extern int of_irq_parse_one(struct device_node *device, int index,
struct of_phandle_args *out_irq);
extern int of_irq_count(struct device_node *dev);
extern int of_irq_get(struct device_node *dev, int index);
+extern const struct cpumask *of_irq_get_affinity(struct device_node *dev,
+ int index);
extern int of_irq_get_byname(struct device_node *dev, const char *name);
extern int of_irq_to_resource_table(struct device_node *dev,
struct resource *res, int nr_irqs);
@@ -76,6 +78,11 @@ static inline int of_irq_get_byname(struct device_node *dev, const char *name)
{
return 0;
}
+static inline const struct cpumask *of_irq_get_affinity(struct device_node *dev,
+ int index)
+{
+ return NULL;
+}
static inline int of_irq_to_resource_table(struct device_node *dev,
struct resource *res, int nr_irqs)
{
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 12d90360f6db..43c854a273c3 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -52,7 +52,7 @@
__section(".discard") __attribute__((unused))
/*
- * s390 and alpha modules require percpu variables to be defined as
+ * alpha modules require percpu variables to be defined as
* weak to force the compiler to generate GOT based external
* references for them. This is necessary because percpu sections
* will be located outside of the usually addressable area.
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 93c9a26492fc..52b37f7bdbf9 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -119,6 +119,7 @@ struct arm_pmu {
/* PMUv3 only */
int pmuver;
+ bool has_smt;
u64 reg_pmmir;
u64 reg_brbidr;
#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
@@ -132,8 +133,6 @@ struct arm_pmu {
#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-DECLARE_PER_CPU(struct arm_pmu *, cpu_armpmu);
-
u64 armpmu_event_update(struct perf_event *event);
int armpmu_event_set_period(struct perf_event *event);
@@ -190,8 +189,8 @@ bool arm_pmu_irq_is_nmi(void);
struct arm_pmu *armpmu_alloc(void);
void armpmu_free(struct arm_pmu *pmu);
int armpmu_register(struct arm_pmu *pmu);
-int armpmu_request_irq(int irq, int cpu);
-void armpmu_free_irq(int irq, int cpu);
+int armpmu_request_irq(struct arm_pmu * __percpu *armpmu, int irq, int cpu);
+void armpmu_free_irq(struct arm_pmu * __percpu *armpmu, int irq, int cpu);
#define ARMV8_PMU_PDEV_NAME "armv8-pmu"
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 32e8457ad535..ee3148ef87f6 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1232,6 +1232,10 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
#define flush_tlb_fix_spurious_fault(vma, address, ptep) flush_tlb_page(vma, address)
#endif
+#ifndef flush_tlb_fix_spurious_fault_pmd
+#define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) do { } while (0)
+#endif
+
/*
* When walking page tables, get the address of the next boundary,
* or the end address of the range if that comes earlier. Although no
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 074754c23d33..93c945331f39 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -102,6 +102,8 @@ devm_platform_ioremap_resource_byname(struct platform_device *pdev,
extern int platform_get_irq(struct platform_device *, unsigned int);
extern int platform_get_irq_optional(struct platform_device *, unsigned int);
+extern int platform_get_irq_affinity(struct platform_device *, unsigned int,
+ const struct cpumask **);
extern int platform_irq_count(struct platform_device *);
extern int devm_platform_get_irqs_affinity(struct platform_device *dev,
struct irq_affinity *affd,
@@ -232,6 +234,7 @@ extern int platform_device_add_data(struct platform_device *pdev,
extern int platform_device_add(struct platform_device *pdev);
extern void platform_device_del(struct platform_device *pdev);
extern void platform_device_put(struct platform_device *pdev);
+DEFINE_FREE(platform_device_put, struct platform_device *, if (_T) platform_device_put(_T))
struct platform_driver {
int (*probe)(struct platform_device *);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index cc7b2dc28574..7f69f739f613 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -25,11 +25,12 @@ extern void (*pm_power_off)(void);
struct device; /* we have a circular dep with device.h */
#ifdef CONFIG_VT_CONSOLE_SLEEP
-extern void pm_vt_switch_required(struct device *dev, bool required);
+extern int pm_vt_switch_required(struct device *dev, bool required);
extern void pm_vt_switch_unregister(struct device *dev);
#else
-static inline void pm_vt_switch_required(struct device *dev, bool required)
+static inline int pm_vt_switch_required(struct device *dev, bool required)
{
+ return 0;
}
static inline void pm_vt_switch_unregister(struct device *dev)
{
@@ -507,6 +508,7 @@ const struct dev_pm_ops name = { \
* RECOVER Creation of a hibernation image or restoration of the main
* memory contents from a hibernation image has failed, call
* ->thaw() and ->complete() for all devices.
+ * POWEROFF System will poweroff, call ->poweroff() for all devices.
*
* The following PM_EVENT_ messages are defined for internal use by
* kernel subsystems. They are never issued by the PM core.
@@ -537,6 +539,7 @@ const struct dev_pm_ops name = { \
#define PM_EVENT_USER 0x0100
#define PM_EVENT_REMOTE 0x0200
#define PM_EVENT_AUTO 0x0400
+#define PM_EVENT_POWEROFF 0x0800
#define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE)
#define PM_EVENT_USER_SUSPEND (PM_EVENT_USER | PM_EVENT_SUSPEND)
@@ -551,6 +554,7 @@ const struct dev_pm_ops name = { \
#define PMSG_QUIESCE ((struct pm_message){ .event = PM_EVENT_QUIESCE, })
#define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, })
#define PMSG_HIBERNATE ((struct pm_message){ .event = PM_EVENT_HIBERNATE, })
+#define PMSG_POWEROFF ((struct pm_message){ .event = PM_EVENT_POWEROFF, })
#define PMSG_RESUME ((struct pm_message){ .event = PM_EVENT_RESUME, })
#define PMSG_THAW ((struct pm_message){ .event = PM_EVENT_THAW, })
#define PMSG_RESTORE ((struct pm_message){ .event = PM_EVENT_RESTORE, })
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index f67a2cb7d781..93ba0143ca47 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -153,6 +153,7 @@ enum genpd_sync_state {
};
struct dev_power_governor {
+ bool (*system_power_down_ok)(struct dev_pm_domain *domain);
bool (*power_down_ok)(struct dev_pm_domain *domain);
bool (*suspend_ok)(struct device *dev);
};
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 4a69d4af3ff8..6cea4455f867 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -162,6 +162,15 @@ static inline void cpu_latency_qos_update_request(struct pm_qos_request *req,
static inline void cpu_latency_qos_remove_request(struct pm_qos_request *req) {}
#endif
+#ifdef CONFIG_PM_QOS_CPU_SYSTEM_WAKEUP
+s32 cpu_wakeup_latency_qos_limit(void);
+#else
+static inline s32 cpu_wakeup_latency_qos_limit(void)
+{
+ return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
+}
+#endif
+
#ifdef CONFIG_PM
enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask);
enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask);
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 0b436e15f4cd..911d7a4d32c1 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -637,6 +637,30 @@ DEFINE_GUARD_COND(pm_runtime_active_auto, _try,
DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled,
pm_runtime_resume_and_get(_T), _RET == 0)
+/* ACQUIRE() wrapper macros for the guards defined above. */
+
+#define PM_RUNTIME_ACQUIRE(_dev, _var) \
+ ACQUIRE(pm_runtime_active_try, _var)(_dev)
+
+#define PM_RUNTIME_ACQUIRE_AUTOSUSPEND(_dev, _var) \
+ ACQUIRE(pm_runtime_active_auto_try, _var)(_dev)
+
+#define PM_RUNTIME_ACQUIRE_IF_ENABLED(_dev, _var) \
+ ACQUIRE(pm_runtime_active_try_enabled, _var)(_dev)
+
+#define PM_RUNTIME_ACQUIRE_IF_ENABLED_AUTOSUSPEND(_dev, _var) \
+ ACQUIRE(pm_runtime_active_auto_try_enabled, _var)(_dev)
+
+/*
+ * ACQUIRE_ERR() wrapper macro for guard pm_runtime_active.
+ *
+ * Always check PM_RUNTIME_ACQUIRE_ERR() after using one of the
+ * PM_RUNTIME_ACQUIRE*() macros defined above (yes, it can be used with
+ * any of them) and if it is nonzero, avoid accessing the given device.
+ */
+#define PM_RUNTIME_ACQUIRE_ERR(_var_ptr) \
+ ACQUIRE_ERR(pm_runtime_active, _var_ptr)
+
/**
* pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0.
* @dev: Target device.
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index f2ed5b72b3d6..ff7dcc3fa105 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -47,10 +47,4 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
state->s4 = __seed(i, 128U);
}
-/* Pseudo random number generator from numerical recipes. */
-static inline u32 next_pseudo_random32(u32 seed)
-{
- return seed * 1664525 + 1013904223;
-}
-
#endif
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 102202185d7a..d964f965c8ff 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -134,11 +134,9 @@ static __always_inline unsigned char interrupt_context_level(void)
/*
* The following macros are deprecated and should not be used in new code:
- * in_irq() - Obsolete version of in_hardirq()
* in_softirq() - We have BH disabled, or are processing softirqs
* in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled
*/
-#define in_irq() (hardirq_count())
#define in_softirq() (softirq_count())
#define in_interrupt() (irq_count())
diff --git a/include/linux/prmt.h b/include/linux/prmt.h
index c53ab287e932..8cdc987de963 100644
--- a/include/linux/prmt.h
+++ b/include/linux/prmt.h
@@ -4,9 +4,11 @@
#ifdef CONFIG_ACPI_PRMT
void init_prmt(void);
+bool acpi_prm_handler_available(const guid_t *handler_guid);
int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer);
#else
static inline void init_prmt(void) { }
+static inline bool acpi_prm_handler_available(const guid_t *handler_guid) { return false; }
static inline int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer)
{
return -EOPNOTSUPP;
diff --git a/include/linux/random.h b/include/linux/random.h
index 333cecfca93f..8a8064dc3970 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -130,21 +130,6 @@ static inline int get_random_bytes_wait(void *buf, size_t nbytes)
return ret;
}
-#define declare_get_random_var_wait(name, ret_type) \
- static inline int get_random_ ## name ## _wait(ret_type *out) { \
- int ret = wait_for_random_bytes(); \
- if (unlikely(ret)) \
- return ret; \
- *out = get_random_ ## name(); \
- return 0; \
- }
-declare_get_random_var_wait(u8, u8)
-declare_get_random_var_wait(u16, u16)
-declare_get_random_var_wait(u32, u32)
-declare_get_random_var_wait(u64, u32)
-declare_get_random_var_wait(long, unsigned long)
-#undef declare_get_random_var
-
#ifdef CONFIG_SMP
int random_prepare_cpu(unsigned int cpu);
int random_online_cpu(unsigned int cpu);
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index a7d92718b653..54701668b3df 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -206,6 +206,8 @@ struct rdt_mon_domain {
* @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid.
* @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache
* level has CPU scope.
+ * @io_alloc_capable: True if portion of the cache can be configured
+ * for I/O traffic.
*/
struct resctrl_cache {
unsigned int cbm_len;
@@ -213,6 +215,7 @@ struct resctrl_cache {
unsigned int shareable_bits;
bool arch_has_sparse_bitmasks;
bool arch_has_per_cpu_cfg;
+ bool io_alloc_capable;
};
/**
@@ -654,6 +657,27 @@ void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 closid, u32 rmid, int cntr_id,
enum resctrl_event_id eventid);
+/**
+ * resctrl_arch_io_alloc_enable() - Enable/disable io_alloc feature.
+ * @r: The resctrl resource.
+ * @enable: Enable (true) or disable (false) io_alloc on resource @r.
+ *
+ * This can be called from any CPU.
+ *
+ * Return:
+ * 0 on success, <0 on error.
+ */
+int resctrl_arch_io_alloc_enable(struct rdt_resource *r, bool enable);
+
+/**
+ * resctrl_arch_get_io_alloc_enabled() - Get io_alloc feature state.
+ * @r: The resctrl resource.
+ *
+ * Return:
+ * true if io_alloc is enabled or false if disabled.
+ */
+bool resctrl_arch_get_io_alloc_enabled(struct rdt_resource *r);
+
extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index 7e50bbc94e47..36ddfa1ec301 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -43,7 +43,7 @@ struct restart_block {
struct __kernel_timespec __user *rmtp;
struct old_timespec32 __user *compat_rmtp;
};
- u64 expires;
+ ktime_t expires;
} nanosleep;
/* For poll */
struct {
diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h
index e0135e0adae0..bf92227c78d0 100644
--- a/include/linux/resume_user_mode.h
+++ b/include/linux/resume_user_mode.h
@@ -59,7 +59,7 @@ static inline void resume_user_mode_work(struct pt_regs *regs)
mem_cgroup_handle_over_high(GFP_KERNEL);
blkcg_maybe_throttle_current();
- rseq_handle_notify_resume(NULL, regs);
+ rseq_handle_slowpath(regs);
}
#endif /* LINUX_RESUME_USER_MODE_H */
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index 69553e7c14c1..2266f4dc77b6 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -3,134 +3,164 @@
#define _LINUX_RSEQ_H
#ifdef CONFIG_RSEQ
-
-#include <linux/preempt.h>
#include <linux/sched.h>
-#ifdef CONFIG_MEMBARRIER
-# define RSEQ_EVENT_GUARD irq
-#else
-# define RSEQ_EVENT_GUARD preempt
-#endif
-
-/*
- * Map the event mask on the user-space ABI enum rseq_cs_flags
- * for direct mask checks.
- */
-enum rseq_event_mask_bits {
- RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
- RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
- RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
-};
-
-enum rseq_event_mask {
- RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
- RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
- RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
-};
-
-static inline void rseq_set_notify_resume(struct task_struct *t)
-{
- if (t->rseq)
- set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
-}
+#include <uapi/linux/rseq.h>
-void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
+void __rseq_handle_slowpath(struct pt_regs *regs);
-static inline void rseq_handle_notify_resume(struct ksignal *ksig,
- struct pt_regs *regs)
+/* Invoked from resume_user_mode_work() */
+static inline void rseq_handle_slowpath(struct pt_regs *regs)
{
- if (current->rseq)
- __rseq_handle_notify_resume(ksig, regs);
+ if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
+ if (current->rseq.event.slowpath)
+ __rseq_handle_slowpath(regs);
+ } else {
+ /* '&' is intentional to spare one conditional branch */
+ if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
+ __rseq_handle_slowpath(regs);
+ }
}
-static inline void rseq_signal_deliver(struct ksignal *ksig,
- struct pt_regs *regs)
-{
- scoped_guard(RSEQ_EVENT_GUARD)
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
- rseq_handle_notify_resume(ksig, regs);
-}
+void __rseq_signal_deliver(int sig, struct pt_regs *regs);
-/* rseq_preempt() requires preemption to be disabled. */
-static inline void rseq_preempt(struct task_struct *t)
+/*
+ * Invoked from signal delivery to fixup based on the register context before
+ * switching to the signal delivery context.
+ */
+static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
{
- __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
- rseq_set_notify_resume(t);
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /* '&' is intentional to spare one conditional branch */
+ if (current->rseq.event.has_rseq & current->rseq.event.user_irq)
+ __rseq_signal_deliver(ksig->sig, regs);
+ } else {
+ if (current->rseq.event.has_rseq)
+ __rseq_signal_deliver(ksig->sig, regs);
+ }
}
-/* rseq_migrate() requires preemption to be disabled. */
-static inline void rseq_migrate(struct task_struct *t)
+static inline void rseq_raise_notify_resume(struct task_struct *t)
{
- __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
- rseq_set_notify_resume(t);
+ set_tsk_thread_flag(t, TIF_RSEQ);
}
-/*
- * If parent process has a registered restartable sequences area, the
- * child inherits. Unregister rseq for a clone with CLONE_VM set.
- */
-static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
+/* Invoked from context switch to force evaluation on exit to user */
+static __always_inline void rseq_sched_switch_event(struct task_struct *t)
{
- if (clone_flags & CLONE_VM) {
- t->rseq = NULL;
- t->rseq_len = 0;
- t->rseq_sig = 0;
- t->rseq_event_mask = 0;
+ struct rseq_event *ev = &t->rseq.event;
+
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /*
+ * Avoid a boat load of conditionals by using simple logic
+ * to determine whether NOTIFY_RESUME needs to be raised.
+ *
+ * It's required when the CPU or MM CID has changed or
+ * the entry was from user space.
+ */
+ bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq;
+
+ if (raise) {
+ ev->sched_switch = true;
+ rseq_raise_notify_resume(t);
+ }
} else {
- t->rseq = current->rseq;
- t->rseq_len = current->rseq_len;
- t->rseq_sig = current->rseq_sig;
- t->rseq_event_mask = current->rseq_event_mask;
+ if (ev->has_rseq) {
+ t->rseq.event.sched_switch = true;
+ rseq_raise_notify_resume(t);
+ }
}
}
-static inline void rseq_execve(struct task_struct *t)
+/*
+ * Invoked from __set_task_cpu() when a task migrates or from
+ * mm_cid_schedin() when the CID changes to enforce an IDs update.
+ *
+ * This does not raise TIF_NOTIFY_RESUME as that happens in
+ * rseq_sched_switch_event().
+ */
+static __always_inline void rseq_sched_set_ids_changed(struct task_struct *t)
{
- t->rseq = NULL;
- t->rseq_len = 0;
- t->rseq_sig = 0;
- t->rseq_event_mask = 0;
+ t->rseq.event.ids_changed = true;
}
-#else
-
-static inline void rseq_set_notify_resume(struct task_struct *t)
-{
-}
-static inline void rseq_handle_notify_resume(struct ksignal *ksig,
- struct pt_regs *regs)
+/* Enforce a full update after RSEQ registration and when execve() failed */
+static inline void rseq_force_update(void)
{
+ if (current->rseq.event.has_rseq) {
+ current->rseq.event.ids_changed = true;
+ current->rseq.event.sched_switch = true;
+ rseq_raise_notify_resume(current);
+ }
}
-static inline void rseq_signal_deliver(struct ksignal *ksig,
- struct pt_regs *regs)
+
+/*
+ * KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
+ * which clears TIF_NOTIFY_RESUME on architectures that don't use the
+ * generic TIF bits and therefore can't provide a separate TIF_RSEQ flag.
+ *
+ * To avoid updating user space RSEQ in that case just to do it eventually
+ * again before returning to user space, because __rseq_handle_slowpath()
+ * does nothing when invoked with NULL register state.
+ *
+ * After returning from guest mode, before exiting to userspace, hypervisors
+ * must invoke this function to re-raise TIF_NOTIFY_RESUME if necessary.
+ */
+static inline void rseq_virt_userspace_exit(void)
{
+ /*
+ * The generic optimization for deferring RSEQ updates until the next
+ * exit relies on having a dedicated TIF_RSEQ.
+ */
+ if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) &&
+ current->rseq.event.sched_switch)
+ rseq_raise_notify_resume(current);
}
-static inline void rseq_preempt(struct task_struct *t)
+
+static inline void rseq_reset(struct task_struct *t)
{
+ memset(&t->rseq, 0, sizeof(t->rseq));
+ t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
}
-static inline void rseq_migrate(struct task_struct *t)
+
+static inline void rseq_execve(struct task_struct *t)
{
+ rseq_reset(t);
}
+
+/*
+ * If parent process has a registered restartable sequences area, the
+ * child inherits. Unregister rseq for a clone with CLONE_VM set.
+ *
+ * On fork, keep the IDs (CPU, MMCID) of the parent, which avoids a fault
+ * on the COW page on exit to user space, when the child stays on the same
+ * CPU as the parent. That's obviously not guaranteed, but in overcommit
+ * scenarios it is more likely and optimizes for the fork/exec case without
+ * taking the fault.
+ */
static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
{
-}
-static inline void rseq_execve(struct task_struct *t)
-{
+ if (clone_flags & CLONE_VM)
+ rseq_reset(t);
+ else
+ t->rseq = current->rseq;
}
-#endif
+#else /* CONFIG_RSEQ */
+static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
+static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
+static inline void rseq_sched_switch_event(struct task_struct *t) { }
+static inline void rseq_sched_set_ids_changed(struct task_struct *t) { }
+static inline void rseq_force_update(void) { }
+static inline void rseq_virt_userspace_exit(void) { }
+static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { }
+static inline void rseq_execve(struct task_struct *t) { }
+#endif /* !CONFIG_RSEQ */
#ifdef CONFIG_DEBUG_RSEQ
-
void rseq_syscall(struct pt_regs *regs);
-
-#else
-
-static inline void rseq_syscall(struct pt_regs *regs)
-{
-}
-
-#endif
+#else /* CONFIG_DEBUG_RSEQ */
+static inline void rseq_syscall(struct pt_regs *regs) { }
+#endif /* !CONFIG_DEBUG_RSEQ */
#endif /* _LINUX_RSEQ_H */
diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h
new file mode 100644
index 000000000000..c92167ff8a7f
--- /dev/null
+++ b/include/linux/rseq_entry.h
@@ -0,0 +1,616 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RSEQ_ENTRY_H
+#define _LINUX_RSEQ_ENTRY_H
+
+/* Must be outside the CONFIG_RSEQ guard to resolve the stubs */
+#ifdef CONFIG_RSEQ_STATS
+#include <linux/percpu.h>
+
+struct rseq_stats {
+ unsigned long exit;
+ unsigned long signal;
+ unsigned long slowpath;
+ unsigned long fastpath;
+ unsigned long ids;
+ unsigned long cs;
+ unsigned long clear;
+ unsigned long fixup;
+};
+
+DECLARE_PER_CPU(struct rseq_stats, rseq_stats);
+
+/*
+ * Slow path has interrupts and preemption enabled, but the fast path
+ * runs with interrupts disabled so there is no point in having the
+ * preemption checks implied in __this_cpu_inc() for every operation.
+ */
+#ifdef RSEQ_BUILD_SLOW_PATH
+#define rseq_stat_inc(which) this_cpu_inc((which))
+#else
+#define rseq_stat_inc(which) raw_cpu_inc((which))
+#endif
+
+#else /* CONFIG_RSEQ_STATS */
+#define rseq_stat_inc(x) do { } while (0)
+#endif /* !CONFIG_RSEQ_STATS */
+
+#ifdef CONFIG_RSEQ
+#include <linux/jump_label.h>
+#include <linux/rseq.h>
+#include <linux/uaccess.h>
+
+#include <linux/tracepoint-defs.h>
+
+#ifdef CONFIG_TRACEPOINTS
+DECLARE_TRACEPOINT(rseq_update);
+DECLARE_TRACEPOINT(rseq_ip_fixup);
+void __rseq_trace_update(struct task_struct *t);
+void __rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
+ unsigned long offset, unsigned long abort_ip);
+
+static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids)
+{
+ if (tracepoint_enabled(rseq_update) && ids)
+ __rseq_trace_update(t);
+}
+
+static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
+ unsigned long offset, unsigned long abort_ip)
+{
+ if (tracepoint_enabled(rseq_ip_fixup))
+ __rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
+}
+
+#else /* CONFIG_TRACEPOINT */
+static inline void rseq_trace_update(struct task_struct *t, struct rseq_ids *ids) { }
+static inline void rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
+ unsigned long offset, unsigned long abort_ip) { }
+#endif /* !CONFIG_TRACEPOINT */
+
+DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
+
+#ifdef RSEQ_BUILD_SLOW_PATH
+#define rseq_inline
+#else
+#define rseq_inline __always_inline
+#endif
+
+bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
+bool rseq_debug_validate_ids(struct task_struct *t);
+
+static __always_inline void rseq_note_user_irq_entry(void)
+{
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
+ current->rseq.event.user_irq = true;
+}
+
+/*
+ * Check whether there is a valid critical section and whether the
+ * instruction pointer in @regs is inside the critical section.
+ *
+ * - If the critical section is invalid, terminate the task.
+ *
+ * - If valid and the instruction pointer is inside, set it to the abort IP.
+ *
+ * - If valid and the instruction pointer is outside, clear the critical
+ * section address.
+ *
+ * Returns true, if the section was valid and either fixup or clear was
+ * done, false otherwise.
+ *
+ * In the failure case task::rseq_event::fatal is set when a invalid
+ * section was found. It's clear when the failure was an unresolved page
+ * fault.
+ *
+ * If inlined into the exit to user path with interrupts disabled, the
+ * caller has to protect against page faults with pagefault_disable().
+ *
+ * In preemptible task context this would be counterproductive as the page
+ * faults could not be fully resolved. As a consequence unresolved page
+ * faults in task context are fatal too.
+ */
+
+#ifdef RSEQ_BUILD_SLOW_PATH
+/*
+ * The debug version is put out of line, but kept here so the code stays
+ * together.
+ *
+ * @csaddr has already been checked by the caller to be in user space
+ */
+bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs,
+ unsigned long csaddr)
+{
+ struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
+ u64 start_ip, abort_ip, offset, cs_end, head, tasksize = TASK_SIZE;
+ unsigned long ip = instruction_pointer(regs);
+ u64 __user *uc_head = (u64 __user *) ucs;
+ u32 usig, __user *uc_sig;
+
+ scoped_user_rw_access(ucs, efault) {
+ /*
+ * Evaluate the user pile and exit if one of the conditions
+ * is not fulfilled.
+ */
+ unsafe_get_user(start_ip, &ucs->start_ip, efault);
+ if (unlikely(start_ip >= tasksize))
+ goto die;
+ /* If outside, just clear the critical section. */
+ if (ip < start_ip)
+ goto clear;
+
+ unsafe_get_user(offset, &ucs->post_commit_offset, efault);
+ cs_end = start_ip + offset;
+ /* Check for overflow and wraparound */
+ if (unlikely(cs_end >= tasksize || cs_end < start_ip))
+ goto die;
+
+ /* If not inside, clear it. */
+ if (ip >= cs_end)
+ goto clear;
+
+ unsafe_get_user(abort_ip, &ucs->abort_ip, efault);
+ /* Ensure it's "valid" */
+ if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
+ goto die;
+ /* Validate that the abort IP is not in the critical section */
+ if (unlikely(abort_ip - start_ip < offset))
+ goto die;
+
+ /*
+ * Check version and flags for 0. No point in emitting
+ * deprecated warnings before dying. That could be done in
+ * the slow path eventually, but *shrug*.
+ */
+ unsafe_get_user(head, uc_head, efault);
+ if (unlikely(head))
+ goto die;
+
+ /* abort_ip - 4 is >= 0. See abort_ip check above */
+ uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
+ unsafe_get_user(usig, uc_sig, efault);
+ if (unlikely(usig != t->rseq.sig))
+ goto die;
+
+ /* rseq_event.user_irq is only valid if CONFIG_GENERIC_IRQ_ENTRY=y */
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ /* If not in interrupt from user context, let it die */
+ if (unlikely(!t->rseq.event.user_irq))
+ goto die;
+ }
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
+ instruction_pointer_set(regs, (unsigned long)abort_ip);
+ rseq_stat_inc(rseq_stats.fixup);
+ break;
+ clear:
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
+ rseq_stat_inc(rseq_stats.clear);
+ abort_ip = 0ULL;
+ }
+
+ if (unlikely(abort_ip))
+ rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
+ return true;
+die:
+ t->rseq.event.fatal = true;
+efault:
+ return false;
+}
+
+/*
+ * On debug kernels validate that user space did not mess with it if the
+ * debug branch is enabled.
+ */
+bool rseq_debug_validate_ids(struct task_struct *t)
+{
+ struct rseq __user *rseq = t->rseq.usrptr;
+ u32 cpu_id, uval, node_id;
+
+ /*
+ * On the first exit after registering the rseq region CPU ID is
+ * RSEQ_CPU_ID_UNINITIALIZED and node_id in user space is 0!
+ */
+ node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ?
+ cpu_to_node(t->rseq.ids.cpu_id) : 0;
+
+ scoped_user_read_access(rseq, efault) {
+ unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault);
+ if (cpu_id != t->rseq.ids.cpu_id)
+ goto die;
+ unsafe_get_user(uval, &rseq->cpu_id, efault);
+ if (uval != cpu_id)
+ goto die;
+ unsafe_get_user(uval, &rseq->node_id, efault);
+ if (uval != node_id)
+ goto die;
+ unsafe_get_user(uval, &rseq->mm_cid, efault);
+ if (uval != t->rseq.ids.mm_cid)
+ goto die;
+ }
+ return true;
+die:
+ t->rseq.event.fatal = true;
+efault:
+ return false;
+}
+
+#endif /* RSEQ_BUILD_SLOW_PATH */
+
+/*
+ * This only ensures that abort_ip is in the user address space and
+ * validates that it is preceded by the signature.
+ *
+ * No other sanity checks are done here, that's what the debug code is for.
+ */
+static rseq_inline bool
+rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr)
+{
+ struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
+ unsigned long ip = instruction_pointer(regs);
+ unsigned long tasksize = TASK_SIZE;
+ u64 start_ip, abort_ip, offset;
+ u32 usig, __user *uc_sig;
+
+ rseq_stat_inc(rseq_stats.cs);
+
+ if (unlikely(csaddr >= tasksize)) {
+ t->rseq.event.fatal = true;
+ return false;
+ }
+
+ if (static_branch_unlikely(&rseq_debug_enabled))
+ return rseq_debug_update_user_cs(t, regs, csaddr);
+
+ scoped_user_rw_access(ucs, efault) {
+ unsafe_get_user(start_ip, &ucs->start_ip, efault);
+ unsafe_get_user(offset, &ucs->post_commit_offset, efault);
+ unsafe_get_user(abort_ip, &ucs->abort_ip, efault);
+
+ /*
+ * No sanity checks. If user space screwed it up, it can
+ * keep the pieces. That's what debug code is for.
+ *
+ * If outside, just clear the critical section.
+ */
+ if (ip - start_ip >= offset)
+ goto clear;
+
+ /*
+ * Two requirements for @abort_ip:
+ * - Must be in user space as x86 IRET would happily return to
+ * the kernel.
+ * - The four bytes preceding the instruction at @abort_ip must
+ * contain the signature.
+ *
+ * The latter protects against the following attack vector:
+ *
+ * An attacker with limited abilities to write, creates a critical
+ * section descriptor, sets the abort IP to a library function or
+ * some other ROP gadget and stores the address of the descriptor
+ * in TLS::rseq::rseq_cs. An RSEQ abort would then evade ROP
+ * protection.
+ */
+ if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
+ goto die;
+
+ /* The address is guaranteed to be >= 0 and < TASK_SIZE */
+ uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
+ unsafe_get_user(usig, uc_sig, efault);
+ if (unlikely(usig != t->rseq.sig))
+ goto die;
+
+ /* Invalidate the critical section */
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
+ /* Update the instruction pointer */
+ instruction_pointer_set(regs, (unsigned long)abort_ip);
+ rseq_stat_inc(rseq_stats.fixup);
+ break;
+ clear:
+ unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
+ rseq_stat_inc(rseq_stats.clear);
+ abort_ip = 0ULL;
+ }
+
+ if (unlikely(abort_ip))
+ rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
+ return true;
+die:
+ t->rseq.event.fatal = true;
+efault:
+ return false;
+}
+
+/*
+ * Updates CPU ID, Node ID and MM CID and reads the critical section
+ * address, when @csaddr != NULL. This allows to put the ID update and the
+ * read under the same uaccess region to spare a separate begin/end.
+ *
+ * As this is either invoked from a C wrapper with @csaddr = NULL or from
+ * the fast path code with a valid pointer, a clever compiler should be
+ * able to optimize the read out. Spares a duplicate implementation.
+ *
+ * Returns true, if the operation was successful, false otherwise.
+ *
+ * In the failure case task::rseq_event::fatal is set when invalid data
+ * was found on debug kernels. It's clear when the failure was an unresolved page
+ * fault.
+ *
+ * If inlined into the exit to user path with interrupts disabled, the
+ * caller has to protect against page faults with pagefault_disable().
+ *
+ * In preemptible task context this would be counterproductive as the page
+ * faults could not be fully resolved. As a consequence unresolved page
+ * faults in task context are fatal too.
+ */
+static rseq_inline
+bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids,
+ u32 node_id, u64 *csaddr)
+{
+ struct rseq __user *rseq = t->rseq.usrptr;
+
+ if (static_branch_unlikely(&rseq_debug_enabled)) {
+ if (!rseq_debug_validate_ids(t))
+ return false;
+ }
+
+ scoped_user_rw_access(rseq, efault) {
+ unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault);
+ unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault);
+ unsafe_put_user(node_id, &rseq->node_id, efault);
+ unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault);
+ if (csaddr)
+ unsafe_get_user(*csaddr, &rseq->rseq_cs, efault);
+ }
+
+ /* Cache the new values */
+ t->rseq.ids.cpu_cid = ids->cpu_cid;
+ rseq_stat_inc(rseq_stats.ids);
+ rseq_trace_update(t, ids);
+ return true;
+efault:
+ return false;
+}
+
+/*
+ * Update user space with new IDs and conditionally check whether the task
+ * is in a critical section.
+ */
+static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs,
+ struct rseq_ids *ids, u32 node_id)
+{
+ u64 csaddr;
+
+ if (!rseq_set_ids_get_csaddr(t, ids, node_id, &csaddr))
+ return false;
+
+ /*
+ * On architectures which utilize the generic entry code this
+ * allows to skip the critical section when the entry was not from
+ * a user space interrupt, unless debug mode is enabled.
+ */
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
+ if (!static_branch_unlikely(&rseq_debug_enabled)) {
+ if (likely(!t->rseq.event.user_irq))
+ return true;
+ }
+ }
+ if (likely(!csaddr))
+ return true;
+ /* Sigh, this really needs to do work */
+ return rseq_update_user_cs(t, regs, csaddr);
+}
+
+/*
+ * If you want to use this then convert your architecture to the generic
+ * entry code. I'm tired of building workarounds for people who can't be
+ * bothered to make the maintenance of generic infrastructure less
+ * burdensome. Just sucking everything into the architecture code and
+ * thereby making others chase the horrible hacks and keep them working is
+ * neither acceptable nor sustainable.
+ */
+#ifdef CONFIG_GENERIC_ENTRY
+
+/*
+ * This is inlined into the exit path because:
+ *
+ * 1) It's a one time comparison in the fast path when there is no event to
+ * handle
+ *
+ * 2) The access to the user space rseq memory (TLS) is unlikely to fault
+ * so the straight inline operation is:
+ *
+ * - Four 32-bit stores only if CPU ID/ MM CID need to be updated
+ * - One 64-bit load to retrieve the critical section address
+ *
+ * 3) In the unlikely case that the critical section address is != NULL:
+ *
+ * - One 64-bit load to retrieve the start IP
+ * - One 64-bit load to retrieve the offset for calculating the end
+ * - One 64-bit load to retrieve the abort IP
+ * - One 64-bit load to retrieve the signature
+ * - One store to clear the critical section address
+ *
+ * The non-debug case implements only the minimal required checking. It
+ * provides protection against a rogue abort IP in kernel space, which
+ * would be exploitable at least on x86, and also against a rogue CS
+ * descriptor by checking the signature at the abort IP. Any fallout from
+ * invalid critical section descriptors is a user space problem. The debug
+ * case provides the full set of checks and terminates the task if a
+ * condition is not met.
+ *
+ * In case of a fault or an invalid value, this sets TIF_NOTIFY_RESUME and
+ * tells the caller to loop back into exit_to_user_mode_loop(). The rseq
+ * slow path there will handle the failure.
+ */
+static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct task_struct *t)
+{
+ /*
+ * Page faults need to be disabled as this is called with
+ * interrupts disabled
+ */
+ guard(pagefault)();
+ if (likely(!t->rseq.event.ids_changed)) {
+ struct rseq __user *rseq = t->rseq.usrptr;
+ /*
+ * If IDs have not changed rseq_event::user_irq must be true
+ * See rseq_sched_switch_event().
+ */
+ u64 csaddr;
+
+ if (unlikely(get_user_inline(csaddr, &rseq->rseq_cs)))
+ return false;
+
+ if (static_branch_unlikely(&rseq_debug_enabled) || unlikely(csaddr)) {
+ if (unlikely(!rseq_update_user_cs(t, regs, csaddr)))
+ return false;
+ }
+ return true;
+ }
+
+ struct rseq_ids ids = {
+ .cpu_id = task_cpu(t),
+ .mm_cid = task_mm_cid(t),
+ };
+ u32 node_id = cpu_to_node(ids.cpu_id);
+
+ return rseq_update_usr(t, regs, &ids, node_id);
+}
+
+static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *regs)
+{
+ struct task_struct *t = current;
+
+ /*
+ * If the task did not go through schedule or got the flag enforced
+ * by the rseq syscall or execve, then nothing to do here.
+ *
+ * CPU ID and MM CID can only change when going through a context
+ * switch.
+ *
+ * rseq_sched_switch_event() sets the rseq_event::sched_switch bit
+ * only when rseq_event::has_rseq is true. That conditional is
+ * required to avoid setting the TIF bit if RSEQ is not registered
+ * for a task. rseq_event::sched_switch is cleared when RSEQ is
+ * unregistered by a task so it's sufficient to check for the
+ * sched_switch bit alone.
+ *
+ * A sane compiler requires three instructions for the nothing to do
+ * case including clearing the events, but your mileage might vary.
+ */
+ if (unlikely((t->rseq.event.sched_switch))) {
+ rseq_stat_inc(rseq_stats.fastpath);
+
+ if (unlikely(!rseq_exit_user_update(regs, t)))
+ return true;
+ }
+ /* Clear state so next entry starts from a clean slate */
+ t->rseq.event.events = 0;
+ return false;
+}
+
+/* Required to allow conversion to GENERIC_ENTRY w/o GENERIC_TIF_BITS */
+#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
+static __always_inline bool test_tif_rseq(unsigned long ti_work)
+{
+ return ti_work & _TIF_RSEQ;
+}
+
+static __always_inline void clear_tif_rseq(void)
+{
+ static_assert(TIF_RSEQ != TIF_NOTIFY_RESUME);
+ clear_thread_flag(TIF_RSEQ);
+}
+#else
+static __always_inline bool test_tif_rseq(unsigned long ti_work) { return true; }
+static __always_inline void clear_tif_rseq(void) { }
+#endif
+
+static __always_inline bool
+rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
+{
+ if (likely(!test_tif_rseq(ti_work)))
+ return false;
+
+ if (unlikely(__rseq_exit_to_user_mode_restart(regs))) {
+ current->rseq.event.slowpath = true;
+ set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+ return true;
+ }
+
+ clear_tif_rseq();
+ return false;
+}
+
+#else /* CONFIG_GENERIC_ENTRY */
+static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
+{
+ return false;
+}
+#endif /* !CONFIG_GENERIC_ENTRY */
+
+static __always_inline void rseq_syscall_exit_to_user_mode(void)
+{
+ struct rseq_event *ev = &current->rseq.event;
+
+ rseq_stat_inc(rseq_stats.exit);
+
+ /* Needed to remove the store for the !lockdep case */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ WARN_ON_ONCE(ev->sched_switch);
+ ev->events = 0;
+ }
+}
+
+static __always_inline void rseq_irqentry_exit_to_user_mode(void)
+{
+ struct rseq_event *ev = &current->rseq.event;
+
+ rseq_stat_inc(rseq_stats.exit);
+
+ lockdep_assert_once(!ev->sched_switch);
+
+ /*
+ * Ensure that event (especially user_irq) is cleared when the
+ * interrupt did not result in a schedule and therefore the
+ * rseq processing could not clear it.
+ */
+ ev->events = 0;
+}
+
+/* Required to keep ARM64 working */
+static __always_inline void rseq_exit_to_user_mode_legacy(void)
+{
+ struct rseq_event *ev = &current->rseq.event;
+
+ rseq_stat_inc(rseq_stats.exit);
+
+ if (static_branch_unlikely(&rseq_debug_enabled))
+ WARN_ON_ONCE(ev->sched_switch);
+
+ /*
+ * Ensure that event (especially user_irq) is cleared when the
+ * interrupt did not result in a schedule and therefore the
+ * rseq processing did not clear it.
+ */
+ ev->events = 0;
+}
+
+void __rseq_debug_syscall_return(struct pt_regs *regs);
+
+static inline void rseq_debug_syscall_return(struct pt_regs *regs)
+{
+ if (static_branch_unlikely(&rseq_debug_enabled))
+ __rseq_debug_syscall_return(regs);
+}
+#else /* CONFIG_RSEQ */
+static inline void rseq_note_user_irq_entry(void) { }
+static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
+{
+ return false;
+}
+static inline void rseq_syscall_exit_to_user_mode(void) { }
+static inline void rseq_irqentry_exit_to_user_mode(void) { }
+static inline void rseq_exit_to_user_mode_legacy(void) { }
+static inline void rseq_debug_syscall_return(struct pt_regs *regs) { }
+#endif /* !CONFIG_RSEQ */
+
+#endif /* _LINUX_RSEQ_ENTRY_H */
diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h
new file mode 100644
index 000000000000..332dc14b81c9
--- /dev/null
+++ b/include/linux/rseq_types.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RSEQ_TYPES_H
+#define _LINUX_RSEQ_TYPES_H
+
+#include <linux/irq_work_types.h>
+#include <linux/types.h>
+#include <linux/workqueue_types.h>
+
+#ifdef CONFIG_RSEQ
+struct rseq;
+
+/**
+ * struct rseq_event - Storage for rseq related event management
+ * @all: Compound to initialize and clear the data efficiently
+ * @events: Compound to access events with a single load/store
+ * @sched_switch: True if the task was scheduled and needs update on
+ * exit to user
+ * @ids_changed: Indicator that IDs need to be updated
+ * @user_irq: True on interrupt entry from user mode
+ * @has_rseq: True if the task has a rseq pointer installed
+ * @error: Compound error code for the slow path to analyze
+ * @fatal: User space data corrupted or invalid
+ * @slowpath: Indicator that slow path processing via TIF_NOTIFY_RESUME
+ * is required
+ *
+ * @sched_switch and @ids_changed must be adjacent and the combo must be
+ * 16bit aligned to allow a single store, when both are set at the same
+ * time in the scheduler.
+ */
+struct rseq_event {
+ union {
+ u64 all;
+ struct {
+ union {
+ u32 events;
+ struct {
+ u8 sched_switch;
+ u8 ids_changed;
+ u8 user_irq;
+ };
+ };
+
+ u8 has_rseq;
+ u8 __pad;
+ union {
+ u16 error;
+ struct {
+ u8 fatal;
+ u8 slowpath;
+ };
+ };
+ };
+ };
+};
+
+/**
+ * struct rseq_ids - Cache for ids, which need to be updated
+ * @cpu_cid: Compound of @cpu_id and @mm_cid to make the
+ * compiler emit a single compare on 64-bit
+ * @cpu_id: The CPU ID which was written last to user space
+ * @mm_cid: The MM CID which was written last to user space
+ *
+ * @cpu_id and @mm_cid are updated when the data is written to user space.
+ */
+struct rseq_ids {
+ union {
+ u64 cpu_cid;
+ struct {
+ u32 cpu_id;
+ u32 mm_cid;
+ };
+ };
+};
+
+/**
+ * struct rseq_data - Storage for all rseq related data
+ * @usrptr: Pointer to the registered user space RSEQ memory
+ * @len: Length of the RSEQ region
+ * @sig: Signature of critial section abort IPs
+ * @event: Storage for event management
+ * @ids: Storage for cached CPU ID and MM CID
+ */
+struct rseq_data {
+ struct rseq __user *usrptr;
+ u32 len;
+ u32 sig;
+ struct rseq_event event;
+ struct rseq_ids ids;
+};
+
+#else /* CONFIG_RSEQ */
+struct rseq_data { };
+#endif /* !CONFIG_RSEQ */
+
+#ifdef CONFIG_SCHED_MM_CID
+
+#define MM_CID_UNSET BIT(31)
+#define MM_CID_ONCPU BIT(30)
+#define MM_CID_TRANSIT BIT(29)
+
+/**
+ * struct sched_mm_cid - Storage for per task MM CID data
+ * @active: MM CID is active for the task
+ * @cid: The CID associated to the task either permanently or
+ * borrowed from the CPU
+ */
+struct sched_mm_cid {
+ unsigned int active;
+ unsigned int cid;
+};
+
+/**
+ * struct mm_cid_pcpu - Storage for per CPU MM_CID data
+ * @cid: The CID associated to the CPU either permanently or
+ * while a task with a CID is running
+ */
+struct mm_cid_pcpu {
+ unsigned int cid;
+}____cacheline_aligned_in_smp;
+
+/**
+ * struct mm_mm_cid - Storage for per MM CID data
+ * @pcpu: Per CPU storage for CIDs associated to a CPU
+ * @percpu: Set, when CIDs are in per CPU mode
+ * @transit: Set to MM_CID_TRANSIT during a mode change transition phase
+ * @max_cids: The exclusive maximum CID value for allocation and convergence
+ * @irq_work: irq_work to handle the affinity mode change case
+ * @work: Regular work to handle the affinity mode change case
+ * @lock: Spinlock to protect against affinity setting which can't take @mutex
+ * @mutex: Mutex to serialize forks and exits related to this mm
+ * @nr_cpus_allowed: The number of CPUs in the per MM allowed CPUs map. The map
+ * is growth only.
+ * @users: The number of tasks sharing this MM. Separate from mm::mm_users
+ * as that is modified by mmget()/mm_put() by other entities which
+ * do not actually share the MM.
+ * @pcpu_thrs: Threshold for switching back from per CPU mode
+ * @update_deferred: A deferred switch back to per task mode is pending.
+ */
+struct mm_mm_cid {
+ /* Hotpath read mostly members */
+ struct mm_cid_pcpu __percpu *pcpu;
+ unsigned int percpu;
+ unsigned int transit;
+ unsigned int max_cids;
+
+ /* Rarely used. Moves @lock and @mutex into the second cacheline */
+ struct irq_work irq_work;
+ struct work_struct work;
+
+ raw_spinlock_t lock;
+ struct mutex mutex;
+
+ /* Low frequency modified */
+ unsigned int nr_cpus_allowed;
+ unsigned int users;
+ unsigned int pcpu_thrs;
+ unsigned int update_deferred;
+}____cacheline_aligned_in_smp;
+#else /* CONFIG_SCHED_MM_CID */
+struct mm_mm_cid { };
+struct sched_mm_cid { };
+#endif /* !CONFIG_SCHED_MM_CID */
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e84bc5bce816..fac12bb7dbe4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -41,7 +41,7 @@
#include <linux/task_io_accounting.h>
#include <linux/posix-timers_types.h>
#include <linux/restart_block.h>
-#include <uapi/linux/rseq.h>
+#include <linux/rseq_types.h>
#include <linux/seqlock_types.h>
#include <linux/kcsan.h>
#include <linux/rv.h>
@@ -1406,33 +1406,8 @@ struct task_struct {
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
-#ifdef CONFIG_RSEQ
- struct rseq __user *rseq;
- u32 rseq_len;
- u32 rseq_sig;
- /*
- * RmW on rseq_event_mask must be performed atomically
- * with respect to preemption.
- */
- unsigned long rseq_event_mask;
-# ifdef CONFIG_DEBUG_RSEQ
- /*
- * This is a place holder to save a copy of the rseq fields for
- * validation of read-only fields. The struct rseq has a
- * variable-length array at the end, so it cannot be used
- * directly. Reserve a size large enough for the known fields.
- */
- char rseq_fields[sizeof(struct rseq)];
-# endif
-#endif
-
-#ifdef CONFIG_SCHED_MM_CID
- int mm_cid; /* Current cid in mm */
- int last_mm_cid; /* Most recent cid in mm */
- int migrate_from_cpu;
- int mm_cid_active; /* Whether cid bitmap is active */
- struct callback_head cid_work;
-#endif
+ struct rseq_data rseq;
+ struct sched_mm_cid mm_cid;
struct tlbflush_unmap_batch tlb_ubc;
@@ -1901,6 +1876,7 @@ extern int sched_setscheduler(struct task_struct *, int, const struct sched_para
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern void sched_set_fifo(struct task_struct *p);
extern void sched_set_fifo_low(struct task_struct *p);
+extern void sched_set_fifo_secondary(struct task_struct *p);
extern void sched_set_normal(struct task_struct *p, int nice);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *);
@@ -2325,6 +2301,32 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
#define alloc_tag_restore(_tag, _old) do {} while (0)
#endif
+/* Avoids recursive inclusion hell */
+#ifdef CONFIG_SCHED_MM_CID
+void sched_mm_cid_before_execve(struct task_struct *t);
+void sched_mm_cid_after_execve(struct task_struct *t);
+void sched_mm_cid_fork(struct task_struct *t);
+void sched_mm_cid_exit(struct task_struct *t);
+static __always_inline int task_mm_cid(struct task_struct *t)
+{
+ return t->mm_cid.cid & ~(MM_CID_ONCPU | MM_CID_TRANSIT);
+}
+#else
+static inline void sched_mm_cid_before_execve(struct task_struct *t) { }
+static inline void sched_mm_cid_after_execve(struct task_struct *t) { }
+static inline void sched_mm_cid_fork(struct task_struct *t) { }
+static inline void sched_mm_cid_exit(struct task_struct *t) { }
+static __always_inline int task_mm_cid(struct task_struct *t)
+{
+ /*
+ * Use the processor id as a fall-back when the mm cid feature is
+ * disabled. This provides functional per-cpu data structure accesses
+ * in user-space, althrough it won't provide the memory usage benefits.
+ */
+ return task_cpu(t);
+}
+#endif
+
#ifndef MODULE
#ifndef COMPILE_OFFSETS
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index dd925d84fa46..b40de9bab4b7 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -67,6 +67,11 @@ enum syscall_work_bit {
#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
#endif
+#ifndef TIF_RSEQ
+# define TIF_RSEQ TIF_NOTIFY_RESUME
+# define _TIF_RSEQ _TIF_NOTIFY_RESUME
+#endif
+
#ifdef __KERNEL__
#ifndef arch_set_restart_data
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 0414d9e6b4fc..62e1cea71125 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -188,4 +188,13 @@ int timers_dead_cpu(unsigned int cpu);
#define timers_dead_cpu NULL
#endif
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+extern int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask);
+#else
+static inline int tmigr_isolated_exclude_cpumask(struct cpumask *exclude_cpumask)
+{
+ return 0;
+}
+#endif
+
#endif
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 1beb5b395d81..be395f5f7ee3 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -2,6 +2,7 @@
#ifndef __LINUX_UACCESS_H__
#define __LINUX_UACCESS_H__
+#include <linux/cleanup.h>
#include <linux/fault-inject-usercopy.h>
#include <linux/instrumented.h>
#include <linux/minmax.h>
@@ -35,9 +36,17 @@
#ifdef masked_user_access_begin
#define can_do_masked_user_access() 1
+# ifndef masked_user_write_access_begin
+# define masked_user_write_access_begin masked_user_access_begin
+# endif
+# ifndef masked_user_read_access_begin
+# define masked_user_read_access_begin masked_user_access_begin
+#endif
#else
#define can_do_masked_user_access() 0
#define masked_user_access_begin(src) NULL
+ #define masked_user_read_access_begin(src) NULL
+ #define masked_user_write_access_begin(src) NULL
#define mask_user_address(src) (src)
#endif
@@ -518,7 +527,34 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr,
long count);
long strnlen_user_nofault(const void __user *unsafe_addr, long count);
-#ifndef __get_kernel_nofault
+#ifdef arch_get_kernel_nofault
+/*
+ * Wrap the architecture implementation so that @label can be outside of a
+ * cleanup() scope. A regular C goto works correctly, but ASM goto does
+ * not. Clang rejects such an attempt, but GCC silently emits buggy code.
+ */
+#define __get_kernel_nofault(dst, src, type, label) \
+do { \
+ __label__ local_label; \
+ arch_get_kernel_nofault(dst, src, type, local_label); \
+ if (0) { \
+ local_label: \
+ goto label; \
+ } \
+} while (0)
+
+#define __put_kernel_nofault(dst, src, type, label) \
+do { \
+ __label__ local_label; \
+ arch_put_kernel_nofault(dst, src, type, local_label); \
+ if (0) { \
+ local_label: \
+ goto label; \
+ } \
+} while (0)
+
+#elif !defined(__get_kernel_nofault) /* arch_get_kernel_nofault */
+
#define __get_kernel_nofault(dst, src, type, label) \
do { \
type __user *p = (type __force __user *)(src); \
@@ -535,7 +571,8 @@ do { \
if (__put_user(data, p)) \
goto label; \
} while (0)
-#endif
+
+#endif /* !__get_kernel_nofault */
/**
* get_kernel_nofault(): safely attempt to read from a location
@@ -549,7 +586,42 @@ do { \
copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\
})
-#ifndef user_access_begin
+#ifdef user_access_begin
+
+#ifdef arch_unsafe_get_user
+/*
+ * Wrap the architecture implementation so that @label can be outside of a
+ * cleanup() scope. A regular C goto works correctly, but ASM goto does
+ * not. Clang rejects such an attempt, but GCC silently emits buggy code.
+ *
+ * Some architectures use internal local labels already, but this extra
+ * indirection here is harmless because the compiler optimizes it out
+ * completely in any case. This construct just ensures that the ASM GOTO
+ * target is always in the local scope. The C goto 'label' works correctly
+ * when leaving a cleanup() scope.
+ */
+#define unsafe_get_user(x, ptr, label) \
+do { \
+ __label__ local_label; \
+ arch_unsafe_get_user(x, ptr, local_label); \
+ if (0) { \
+ local_label: \
+ goto label; \
+ } \
+} while (0)
+
+#define unsafe_put_user(x, ptr, label) \
+do { \
+ __label__ local_label; \
+ arch_unsafe_put_user(x, ptr, local_label); \
+ if (0) { \
+ local_label: \
+ goto label; \
+ } \
+} while (0)
+#endif /* arch_unsafe_get_user */
+
+#else /* user_access_begin */
#define user_access_begin(ptr,len) access_ok(ptr, len)
#define user_access_end() do { } while (0)
#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
@@ -559,7 +631,8 @@ do { \
#define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e)
static inline unsigned long user_access_save(void) { return 0UL; }
static inline void user_access_restore(unsigned long flags) { }
-#endif
+#endif /* !user_access_begin */
+
#ifndef user_write_access_begin
#define user_write_access_begin user_access_begin
#define user_write_access_end user_access_end
@@ -569,6 +642,239 @@ static inline void user_access_restore(unsigned long flags) { }
#define user_read_access_end user_access_end
#endif
+/* Define RW variant so the below _mode macro expansion works */
+#define masked_user_rw_access_begin(u) masked_user_access_begin(u)
+#define user_rw_access_begin(u, s) user_access_begin(u, s)
+#define user_rw_access_end() user_access_end()
+
+/* Scoped user access */
+#define USER_ACCESS_GUARD(_mode) \
+static __always_inline void __user * \
+class_user_##_mode##_begin(void __user *ptr) \
+{ \
+ return ptr; \
+} \
+ \
+static __always_inline void \
+class_user_##_mode##_end(void __user *ptr) \
+{ \
+ user_##_mode##_access_end(); \
+} \
+ \
+DEFINE_CLASS(user_ ##_mode## _access, void __user *, \
+ class_user_##_mode##_end(_T), \
+ class_user_##_mode##_begin(ptr), void __user *ptr) \
+ \
+static __always_inline class_user_##_mode##_access_t \
+class_user_##_mode##_access_ptr(void __user *scope) \
+{ \
+ return scope; \
+}
+
+USER_ACCESS_GUARD(read)
+USER_ACCESS_GUARD(write)
+USER_ACCESS_GUARD(rw)
+#undef USER_ACCESS_GUARD
+
+/**
+ * __scoped_user_access_begin - Start a scoped user access
+ * @mode: The mode of the access class (read, write, rw)
+ * @uptr: The pointer to access user space memory
+ * @size: Size of the access
+ * @elbl: Error label to goto when the access region is rejected
+ *
+ * Internal helper for __scoped_user_access(). Don't use directly.
+ */
+#define __scoped_user_access_begin(mode, uptr, size, elbl) \
+({ \
+ typeof(uptr) __retptr; \
+ \
+ if (can_do_masked_user_access()) { \
+ __retptr = masked_user_##mode##_access_begin(uptr); \
+ } else { \
+ __retptr = uptr; \
+ if (!user_##mode##_access_begin(uptr, size)) \
+ goto elbl; \
+ } \
+ __retptr; \
+})
+
+/**
+ * __scoped_user_access - Open a scope for user access
+ * @mode: The mode of the access class (read, write, rw)
+ * @uptr: The pointer to access user space memory
+ * @size: Size of the access
+ * @elbl: Error label to goto when the access region is rejected. It
+ * must be placed outside the scope
+ *
+ * If the user access function inside the scope requires a fault label, it
+ * can use @elbl or a different label outside the scope, which requires
+ * that user access which is implemented with ASM GOTO has been properly
+ * wrapped. See unsafe_get_user() for reference.
+ *
+ * scoped_user_rw_access(ptr, efault) {
+ * unsafe_get_user(rval, &ptr->rval, efault);
+ * unsafe_put_user(wval, &ptr->wval, efault);
+ * }
+ * return 0;
+ * efault:
+ * return -EFAULT;
+ *
+ * The scope is internally implemented as a autoterminating nested for()
+ * loop, which can be left with 'return', 'break' and 'goto' at any
+ * point.
+ *
+ * When the scope is left user_##@_mode##_access_end() is automatically
+ * invoked.
+ *
+ * When the architecture supports masked user access and the access region
+ * which is determined by @uptr and @size is not a valid user space
+ * address, i.e. < TASK_SIZE, the scope sets the pointer to a faulting user
+ * space address and does not terminate early. This optimizes for the good
+ * case and lets the performance uncritical bad case go through the fault.
+ *
+ * The eventual modification of the pointer is limited to the scope.
+ * Outside of the scope the original pointer value is unmodified, so that
+ * the original pointer value is available for diagnostic purposes in an
+ * out of scope fault path.
+ *
+ * Nesting scoped user access into a user access scope is invalid and fails
+ * the build. Nesting into other guards, e.g. pagefault is safe.
+ *
+ * The masked variant does not check the size of the access and relies on a
+ * mapping hole (e.g. guard page) to catch an out of range pointer, the
+ * first access to user memory inside the scope has to be within
+ * @uptr ... @uptr + PAGE_SIZE - 1
+ *
+ * Don't use directly. Use scoped_masked_user_$MODE_access() instead.
+ */
+#define __scoped_user_access(mode, uptr, size, elbl) \
+for (bool done = false; !done; done = true) \
+ for (void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \
+ !done; done = true) \
+ for (CLASS(user_##mode##_access, scope)(_tmpptr); !done; done = true) \
+ /* Force modified pointer usage within the scope */ \
+ for (const typeof(uptr) uptr = _tmpptr; !done; done = true)
+
+/**
+ * scoped_user_read_access_size - Start a scoped user read access with given size
+ * @usrc: Pointer to the user space address to read from
+ * @size: Size of the access starting from @usrc
+ * @elbl: Error label to goto when the access region is rejected
+ *
+ * For further information see __scoped_user_access() above.
+ */
+#define scoped_user_read_access_size(usrc, size, elbl) \
+ __scoped_user_access(read, usrc, size, elbl)
+
+/**
+ * scoped_user_read_access - Start a scoped user read access
+ * @usrc: Pointer to the user space address to read from
+ * @elbl: Error label to goto when the access region is rejected
+ *
+ * The size of the access starting from @usrc is determined via sizeof(*@usrc)).
+ *
+ * For further information see __scoped_user_access() above.
+ */
+#define scoped_user_read_access(usrc, elbl) \
+ scoped_user_read_access_size(usrc, sizeof(*(usrc)), elbl)
+
+/**
+ * scoped_user_write_access_size - Start a scoped user write access with given size
+ * @udst: Pointer to the user space address to write to
+ * @size: Size of the access starting from @udst
+ * @elbl: Error label to goto when the access region is rejected
+ *
+ * For further information see __scoped_user_access() above.
+ */
+#define scoped_user_write_access_size(udst, size, elbl) \
+ __scoped_user_access(write, udst, size, elbl)
+
+/**
+ * scoped_user_write_access - Start a scoped user write access
+ * @udst: Pointer to the user space address to write to
+ * @elbl: Error label to goto when the access region is rejected
+ *
+ * The size of the access starting from @udst is determined via sizeof(*@udst)).
+ *
+ * For further information see __scoped_user_access() above.
+ */
+#define scoped_user_write_access(udst, elbl) \
+ scoped_user_write_access_size(udst, sizeof(*(udst)), elbl)
+
+/**
+ * scoped_user_rw_access_size - Start a scoped user read/write access with given size
+ * @uptr Pointer to the user space address to read from and write to
+ * @size: Size of the access starting from @uptr
+ * @elbl: Error label to goto when the access region is rejected
+ *
+ * For further information see __scoped_user_access() above.
+ */
+#define scoped_user_rw_access_size(uptr, size, elbl) \
+ __scoped_user_access(rw, uptr, size, elbl)
+
+/**
+ * scoped_user_rw_access - Start a scoped user read/write access
+ * @uptr Pointer to the user space address to read from and write to
+ * @elbl: Error label to goto when the access region is rejected
+ *
+ * The size of the access starting from @uptr is determined via sizeof(*@uptr)).
+ *
+ * For further information see __scoped_user_access() above.
+ */
+#define scoped_user_rw_access(uptr, elbl) \
+ scoped_user_rw_access_size(uptr, sizeof(*(uptr)), elbl)
+
+/**
+ * get_user_inline - Read user data inlined
+ * @val: The variable to store the value read from user memory
+ * @usrc: Pointer to the user space memory to read from
+ *
+ * Return: 0 if successful, -EFAULT when faulted
+ *
+ * Inlined variant of get_user(). Only use when there is a demonstrable
+ * performance reason.
+ */
+#define get_user_inline(val, usrc) \
+({ \
+ __label__ efault; \
+ typeof(usrc) _tmpsrc = usrc; \
+ int _ret = 0; \
+ \
+ scoped_user_read_access(_tmpsrc, efault) \
+ unsafe_get_user(val, _tmpsrc, efault); \
+ if (0) { \
+ efault: \
+ _ret = -EFAULT; \
+ } \
+ _ret; \
+})
+
+/**
+ * put_user_inline - Write to user memory inlined
+ * @val: The value to write
+ * @udst: Pointer to the user space memory to write to
+ *
+ * Return: 0 if successful, -EFAULT when faulted
+ *
+ * Inlined variant of put_user(). Only use when there is a demonstrable
+ * performance reason.
+ */
+#define put_user_inline(val, udst) \
+({ \
+ __label__ efault; \
+ typeof(udst) _tmpdst = udst; \
+ int _ret = 0; \
+ \
+ scoped_user_write_access(_tmpdst, efault) \
+ unsafe_put_user(val, _tmpdst, efault); \
+ if (0) { \
+ efault: \
+ _ret = -EFAULT; \
+ } \
+ _ret; \
+})
+
#ifdef CONFIG_HARDENED_USERCOPY
void __noreturn usercopy_abort(const char *name, const char *detail,
bool to_user, unsigned long offset,
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 82904291c2b8..370f8df2fdb4 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -179,7 +179,8 @@ TRACE_EVENT(pstate_sample,
{ PM_EVENT_HIBERNATE, "hibernate" }, \
{ PM_EVENT_THAW, "thaw" }, \
{ PM_EVENT_RESTORE, "restore" }, \
- { PM_EVENT_RECOVER, "recover" })
+ { PM_EVENT_RECOVER, "recover" }, \
+ { PM_EVENT_POWEROFF, "poweroff" })
DEFINE_EVENT(cpu, cpu_frequency,
diff --git a/include/trace/events/rseq.h b/include/trace/events/rseq.h
index 823b47d1ba1e..ce85d650bf4b 100644
--- a/include/trace/events/rseq.h
+++ b/include/trace/events/rseq.h
@@ -21,9 +21,9 @@ TRACE_EVENT(rseq_update,
),
TP_fast_assign(
- __entry->cpu_id = raw_smp_processor_id();
+ __entry->cpu_id = t->rseq.ids.cpu_id;
__entry->node_id = cpu_to_node(__entry->cpu_id);
- __entry->mm_cid = task_mm_cid(t);
+ __entry->mm_cid = t->rseq.ids.mm_cid;
),
TP_printk("cpu_id=%d node_id=%d mm_cid=%d", __entry->cpu_id,
diff --git a/include/trace/events/timer_migration.h b/include/trace/events/timer_migration.h
index 47db5eaf2f9a..61171b13c687 100644
--- a/include/trace/events/timer_migration.h
+++ b/include/trace/events/timer_migration.h
@@ -173,14 +173,14 @@ DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_active,
TP_ARGS(tmc)
);
-DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_online,
+DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_available,
TP_PROTO(struct tmigr_cpu *tmc),
TP_ARGS(tmc)
);
-DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_offline,
+DEFINE_EVENT(tmigr_cpugroup, tmigr_cpu_unavailable,
TP_PROTO(struct tmigr_cpu *tmc),
diff --git a/include/uapi/linux/energy_model.h b/include/uapi/linux/energy_model.h
new file mode 100644
index 000000000000..4ec4c0eabbbb
--- /dev/null
+++ b/include/uapi/linux/energy_model.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/em.yaml */
+/* YNL-GEN uapi header */
+
+#ifndef _UAPI_LINUX_ENERGY_MODEL_H
+#define _UAPI_LINUX_ENERGY_MODEL_H
+
+#define EM_FAMILY_NAME "em"
+#define EM_FAMILY_VERSION 1
+
+enum {
+ EM_A_PDS_PD = 1,
+
+ __EM_A_PDS_MAX,
+ EM_A_PDS_MAX = (__EM_A_PDS_MAX - 1)
+};
+
+enum {
+ EM_A_PD_PAD = 1,
+ EM_A_PD_PD_ID,
+ EM_A_PD_FLAGS,
+ EM_A_PD_CPUS,
+
+ __EM_A_PD_MAX,
+ EM_A_PD_MAX = (__EM_A_PD_MAX - 1)
+};
+
+enum {
+ EM_A_PD_TABLE_PD_ID = 1,
+ EM_A_PD_TABLE_PS,
+
+ __EM_A_PD_TABLE_MAX,
+ EM_A_PD_TABLE_MAX = (__EM_A_PD_TABLE_MAX - 1)
+};
+
+enum {
+ EM_A_PS_PAD = 1,
+ EM_A_PS_PERFORMANCE,
+ EM_A_PS_FREQUENCY,
+ EM_A_PS_POWER,
+ EM_A_PS_COST,
+ EM_A_PS_FLAGS,
+
+ __EM_A_PS_MAX,
+ EM_A_PS_MAX = (__EM_A_PS_MAX - 1)
+};
+
+enum {
+ EM_CMD_GET_PDS = 1,
+ EM_CMD_GET_PD_TABLE,
+ EM_CMD_PD_CREATED,
+ EM_CMD_PD_UPDATED,
+ EM_CMD_PD_DELETED,
+
+ __EM_CMD_MAX,
+ EM_CMD_MAX = (__EM_CMD_MAX - 1)
+};
+
+#define EM_MCGRP_EVENT "event"
+
+#endif /* _UAPI_LINUX_ENERGY_MODEL_H */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d292f96bc06f..c44a8fb3e418 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -382,6 +382,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */
#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */
#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */
+#define PERF_ATTR_SIZE_VER9 144 /* add: config4 */
/*
* 'struct perf_event_attr' contains various attributes that define
@@ -545,6 +546,7 @@ struct perf_event_attr {
__u64 sig_data;
__u64 config3; /* extension of config2 */
+ __u64 config4; /* extension of config3 */
};
/*
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index c233aae5eac9..1b76d508400c 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -114,20 +114,13 @@ struct rseq {
/*
* Restartable sequences flags field.
*
- * This field should only be updated by the thread which
- * registered this data structure. Read by the kernel.
- * Mainly used for single-stepping through rseq critical sections
- * with debuggers.
- *
- * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
- * Inhibit instruction sequence block restart on preemption
- * for this thread.
- * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
- * Inhibit instruction sequence block restart on signal
- * delivery for this thread.
- * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
- * Inhibit instruction sequence block restart on migration for
- * this thread.
+ * This field was initially intended to allow event masking for
+ * single-stepping through rseq critical sections with debuggers.
+ * The kernel does not support this anymore and the relevant bits
+ * are checked for being always false:
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
*/
__u32 flags;