diff options
Diffstat (limited to 'tools/include')
84 files changed, 4092 insertions, 888 deletions
diff --git a/tools/include/asm/timex.h b/tools/include/asm/timex.h new file mode 100644 index 000000000000..5adfe3c6d326 --- /dev/null +++ b/tools/include/asm/timex.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOLS_LINUX_ASM_TIMEX_H +#define __TOOLS_LINUX_ASM_TIMEX_H + +#include <time.h> + +#define cycles_t clock_t + +static inline cycles_t get_cycles(void) +{ + return clock(); +} +#endif // __TOOLS_LINUX_ASM_TIMEX_H diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 2a7f260ef9dc..d4d300040d01 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -19,6 +19,7 @@ bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); bool __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); +void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); bool __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); @@ -79,6 +80,11 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } +static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused) +{ + return malloc(bitmap_size(nbits)); +} + /** * bitmap_zalloc - Allocate bitmap * @nbits: Number of bits @@ -150,6 +156,21 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } +static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __set_bit(start, map); + else if (small_const_nbits(start + nbits)) + *map |= GENMASK(start + nbits - 1, start); + else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && + IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && + __builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) + memset((char *)map + start / 8, 0xff, nbits / 8); + else + __bitmap_set(map, start, nbits); +} + static inline void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits) { diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 60044b608817..8de2914e6510 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -41,7 +41,7 @@ * Missing asm support * * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __init128' data + * in the asm code, as it shifts an 'unsigned __int128' data * type instead of direct representation of 128 bit constants * such as long and unsigned long. The fundamental problem is * that a 128 bit constant will get silently truncated by the diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h new file mode 100644 index 000000000000..6b8713675765 --- /dev/null +++ b/tools/include/linux/cfi_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include <linux/linkage.h> + +#ifdef CONFIG_CFI_CLANG +/* + * Use the __kcfi_typeid_<function> type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI_CLANG */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI_CLANG */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/include/linux/container_of.h b/tools/include/linux/container_of.h new file mode 100644 index 000000000000..c879e14c3dd6 --- /dev/null +++ b/tools/include/linux/container_of.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_CONTAINER_OF_H +#define _TOOLS_LINUX_CONTAINER_OF_H + +#ifndef container_of +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); }) +#endif + +#endif /* _TOOLS_LINUX_CONTAINER_OF_H */ diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 07cfad817d53..c8c18d3908a9 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -11,6 +11,7 @@ #include <linux/panic.h> #include <endian.h> #include <byteswap.h> +#include <linux/container_of.h> #ifndef UINT_MAX #define UINT_MAX (~0U) @@ -25,19 +26,6 @@ #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif -#ifndef container_of -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof(((type *)0)->member) * __mptr = (ptr); \ - (type *)((char *)__mptr - offsetof(type, member)); }) -#endif - #ifndef max #define max(x, y) ({ \ typeof(x) _max1 = (x); \ diff --git a/tools/include/linux/math64.h b/tools/include/linux/math64.h index 4ad45d5943dc..8a67d478bf19 100644 --- a/tools/include/linux/math64.h +++ b/tools/include/linux/math64.h @@ -72,4 +72,9 @@ static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) } #endif +static inline u64 div_u64(u64 dividend, u32 divisor) +{ + return dividend / divisor; +} + #endif /* _LINUX_MATH64_H */ diff --git a/tools/include/linux/moduleparam.h b/tools/include/linux/moduleparam.h new file mode 100644 index 000000000000..4c4d05bef0cb --- /dev/null +++ b/tools/include/linux/moduleparam.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_MODULE_PARAMS_H +#define _TOOLS_LINUX_MODULE_PARAMS_H + +#define MODULE_PARM_DESC(parm, desc) + +#endif // _TOOLS_LINUX_MODULE_PARAMS_H diff --git a/tools/include/linux/prandom.h b/tools/include/linux/prandom.h new file mode 100644 index 000000000000..b745041ccd6a --- /dev/null +++ b/tools/include/linux/prandom.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOLS_LINUX_PRANDOM_H +#define __TOOLS_LINUX_PRANDOM_H + +#include <linux/types.h> + +struct rnd_state { + __u32 s1, s2, s3, s4; +}; + +/* + * Handle minimum values for seeds + */ +static inline u32 __seed(u32 x, u32 m) +{ + return (x < m) ? x + m : x; +} + +/** + * prandom_seed_state - set seed for prandom_u32_state(). + * @state: pointer to state structure to receive the seed. + * @seed: arbitrary 64-bit value to use as a seed. + */ +static inline void prandom_seed_state(struct rnd_state *state, u64 seed) +{ + u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL; + + state->s1 = __seed(i, 2U); + state->s2 = __seed(i, 8U); + state->s3 = __seed(i, 16U); + state->s4 = __seed(i, 128U); +} + +/** + * prandom_u32_state - seeded pseudo-random number generator. + * @state: pointer to state structure holding seeded state. + * + * This is used for pseudo-randomness with no outside seeding. + * For more random results, use get_random_u32(). + */ +static inline u32 prandom_u32_state(struct rnd_state *state) +{ +#define TAUSWORTHE(s, a, b, c, d) (((s & c) << d) ^ (((s << a) ^ s) >> b)) + state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U); + state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U); + state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U); + state->s4 = TAUSWORTHE(state->s4, 3U, 12U, 4294967168U, 13U); + + return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4); +} +#endif // __TOOLS_LINUX_PRANDOM_H diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h index 36cb29bc57c2..1f30956e070d 100644 --- a/tools/include/linux/refcount.h +++ b/tools/include/linux/refcount.h @@ -60,6 +60,11 @@ static inline void refcount_set(refcount_t *r, unsigned int n) atomic_set(&r->refs, n); } +static inline void refcount_set_release(refcount_t *r, unsigned int n) +{ + atomic_set(&r->refs, n); +} + static inline unsigned int refcount_read(const refcount_t *r) { return atomic_read(&r->refs); diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index 51b25e9c4ec7..c87051e2b26f 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -12,6 +12,7 @@ void *kmalloc(size_t size, gfp_t gfp); void kfree(void *p); +void *kmalloc_array(size_t n, size_t size, gfp_t gfp); bool slab_is_available(void); diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index 8519386acd23..4928e33d44ac 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -42,6 +42,8 @@ typedef __s16 s16; typedef __u8 u8; typedef __s8 s8; +typedef unsigned long long ullong; + #ifdef __CHECKER__ #define __bitwise __attribute__((bitwise)) #else diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index a1f55fb24bb3..c335ce0bd195 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -29,17 +29,42 @@ all_files := \ compiler.h \ crt.h \ ctype.h \ + dirent.h \ + elf.h \ errno.h \ + fcntl.h \ + getopt.h \ + limits.h \ + math.h \ nolibc.h \ + poll.h \ + sched.h \ signal.h \ stackprotector.h \ std.h \ stdarg.h \ stdbool.h \ + stddef.h \ stdint.h \ stdlib.h \ string.h \ sys.h \ + sys/auxv.h \ + sys/ioctl.h \ + sys/mman.h \ + sys/mount.h \ + sys/prctl.h \ + sys/random.h \ + sys/reboot.h \ + sys/resource.h \ + sys/stat.h \ + sys/syscall.h \ + sys/sysmacros.h \ + sys/time.h \ + sys/timerfd.h \ + sys/types.h \ + sys/utsname.h \ + sys/wait.h \ time.h \ types.h \ unistd.h \ @@ -70,7 +95,7 @@ help: headers: $(Q)mkdir -p $(OUTPUT)sysroot $(Q)mkdir -p $(OUTPUT)sysroot/include - $(Q)cp $(all_files) $(OUTPUT)sysroot/include/ + $(Q)cp --parents $(all_files) $(OUTPUT)sysroot/include/ $(Q)if [ "$(ARCH)" = "x86" ]; then \ sed -e \ 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \ @@ -89,5 +114,14 @@ headers_standalone: headers $(Q)$(MAKE) -C $(srctree) headers $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot +# GCC uses "s390", clang "systemz" +CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS)) + +headers_check: headers_standalone + for header in $(filter-out crt.h std.h,$(all_files)); do \ + $(CC) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \ + -I$(or $(objtree),$(srctree))/usr/include -include $$header -include $$header || exit 1; \ + done + clean: $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot" diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h index 06fdef7b291a..937a348da42e 100644 --- a/tools/include/nolibc/arch-aarch64.h +++ b/tools/include/nolibc/arch-aarch64.h @@ -146,7 +146,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s { __asm__ volatile ( "mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */ - "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */ "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h index 6180ff99ab43..1f66e7e5a444 100644 --- a/tools/include/nolibc/arch-arm.h +++ b/tools/include/nolibc/arch-arm.h @@ -189,8 +189,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s { __asm__ volatile ( "mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ - "and ip, r0, #-8\n" /* sp must be 8-byte aligned in the callee */ - "mov sp, ip\n" "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h index ff5afc35bbd8..7c9b38e96418 100644 --- a/tools/include/nolibc/arch-i386.h +++ b/tools/include/nolibc/arch-i386.h @@ -167,8 +167,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ - "add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */ - "and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */ "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ "push %eax\n" /* push arg1 on stack to support plain stack modes too */ "call _start_c\n" /* transfer to c runtime */ diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h index fb519545959e..5511705303ea 100644 --- a/tools/include/nolibc/arch-loongarch.h +++ b/tools/include/nolibc/arch-loongarch.h @@ -142,18 +142,11 @@ _arg1; \ }) -#if __loongarch_grlen == 32 -#define LONG_BSTRINS "bstrins.w" -#else /* __loongarch_grlen == 64 */ -#define LONG_BSTRINS "bstrins.d" -#endif - /* startup code */ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */ "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h new file mode 100644 index 000000000000..6dac1845f298 --- /dev/null +++ b/tools/include/nolibc/arch-m68k.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * m68k specific definitions for NOLIBC + * Copyright (C) 2025 Daniel Palmer<daniel@thingy.jp> + * + * Roughly based on one or more of the other arch files. + * + */ + +#ifndef _NOLIBC_ARCH_M68K_H +#define _NOLIBC_ARCH_M68K_H + +#include "compiler.h" +#include "crt.h" + +#define _NOLIBC_SYSCALL_CLOBBERLIST "memory" + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + register long _arg5 __asm__ ("d5") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + register long _arg5 __asm__ ("d5") = (long)(arg5); \ + register long _arg6 __asm__ ("a0") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +void _start(void); +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + "movel %sp, %sp@-\n" + "jsr _start_c\n" + ); + __nolibc_entrypoint_epilogue(); +} + +#endif /* _NOLIBC_ARCH_M68K_H */ diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 1791a8ce58da..753a8ed2cf69 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -179,6 +179,7 @@ }) /* startup code, note that it's called __start on MIPS */ +void __start(void); void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void) { __asm__ volatile ( diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h index ee2fdb8d601d..204564bbcd32 100644 --- a/tools/include/nolibc/arch-powerpc.h +++ b/tools/include/nolibc/arch-powerpc.h @@ -201,7 +201,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrdi 1, 1, 4\n" /* align the stack to 16 bytes */ "li 0, 0\n" /* zero the frame pointer */ "stdu 1, -32(1)\n" /* the initial stack frame */ "bl _start_c\n" /* transfer to c runtime */ @@ -209,7 +208,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s #else __asm__ volatile ( "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrwi 1, 1, 4\n" /* align the stack to 16 bytes */ "li 0, 0\n" /* zero the frame pointer */ "stwu 1, -16(1)\n" /* the initial stack frame */ "bl _start_c\n" /* transfer to c runtime */ diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index 8827bf936212..885383a86c38 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -148,7 +148,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s "lla gp, __global_pointer$\n" ".option pop\n" "mv a0, sp\n" /* save stack pointer to a0, as arg1 of _start_c */ - "andi sp, a0, -16\n" /* sp must be 16-byte aligned */ "call _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h index f9ab83a219b8..df4c3cc713ac 100644 --- a/tools/include/nolibc/arch-s390.h +++ b/tools/include/nolibc/arch-s390.h @@ -5,8 +5,8 @@ #ifndef _NOLIBC_ARCH_S390_H #define _NOLIBC_ARCH_S390_H -#include <asm/signal.h> -#include <asm/unistd.h> +#include <linux/signal.h> +#include <linux/unistd.h> #include "compiler.h" #include "crt.h" @@ -143,8 +143,13 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( +#ifdef __s390x__ "lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */ "aghi %r15, -160\n" /* allocate new stackframe */ +#else + "lr %r2, %r15\n" + "ahi %r15, -96\n" +#endif "xc 0(8,%r15), 0(%r15)\n" /* clear backchain */ "brasl %r14, _start_c\n" /* transfer to c runtime */ ); diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h new file mode 100644 index 000000000000..1435172f3dfe --- /dev/null +++ b/tools/include/nolibc/arch-sparc.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SPARC (32bit and 64bit) specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_SPARC_H +#define _NOLIBC_ARCH_SPARC_H + +#include <linux/unistd.h> + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SPARC: + * - registers are native word size + * - syscall number is passed in g1 + * - arguments are in o0-o5 + * - the system call is performed by calling a trap instruction + * - syscall return value is in o0 + * - syscall error flag is in the carry bit of the processor status register + */ + +#ifdef __arch64__ + +#define _NOLIBC_SYSCALL "t 0x6d\n" \ + "bcs,a %%xcc, 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#else + +#define _NOLIBC_SYSCALL "t 0x10\n" \ + "bcs,a 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#endif /* __arch64__ */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0"); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + register long _arg6 __asm__ ("o5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + /* + * Save argc pointer to o0, as arg1 of _start_c. + * Account for the window save area, which is 16 registers wide. + */ +#ifdef __arch64__ + "add %sp, 128 + 2047, %o0\n" /* on sparc64 / v9 the stack is offset by 2047 */ +#else + "add %sp, 64, %o0\n" +#endif + "b,a _start_c\n" /* transfer to c runtime */ + ); + __nolibc_entrypoint_epilogue(); +} + +static pid_t getpid(void); + +static __attribute__((unused)) +pid_t sys_fork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = my_syscall0(__NR_fork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define sys_fork sys_fork + +#endif /* _NOLIBC_ARCH_SPARC_H */ diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h index 1e40620a2b33..67305e24dbef 100644 --- a/tools/include/nolibc/arch-x86_64.h +++ b/tools/include/nolibc/arch-x86_64.h @@ -166,7 +166,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ "mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */ - "and $-16, %rsp\n" /* %rsp must be 16-byte aligned before call */ "call _start_c\n" /* transfer to c runtime */ "hlt\n" /* ensure it does not return */ ); diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index c8f4e5d3add9..d20b2304aac2 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -29,10 +29,14 @@ #include "arch-powerpc.h" #elif defined(__riscv) #include "arch-riscv.h" -#elif defined(__s390x__) +#elif defined(__s390x__) || defined(__s390__) #include "arch-s390.h" #elif defined(__loongarch__) #include "arch-loongarch.h" +#elif defined(__sparc__) +#include "arch-sparc.h" +#elif defined(__m68k__) +#include "arch-m68k.h" #else #error Unsupported Architecture #endif diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index fa1f547e7f13..369cfb5a0e78 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -12,6 +12,15 @@ # define __nolibc_has_attribute(attr) 0 #endif +#if defined(__has_feature) +# define __nolibc_has_feature(feature) __has_feature(feature) +#else +# define __nolibc_has_feature(feature) 0 +#endif + +#define __nolibc_aligned(alignment) __attribute__((aligned(alignment))) +#define __nolibc_aligned_as(type) __nolibc_aligned(__alignof__(type)) + #if __nolibc_has_attribute(naked) # define __nolibc_entrypoint __attribute__((naked)) # define __nolibc_entrypoint_epilogue() diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h index bbcd5fd09806..961cfe777c35 100644 --- a/tools/include/nolibc/crt.h +++ b/tools/include/nolibc/crt.h @@ -7,9 +7,12 @@ #ifndef _NOLIBC_CRT_H #define _NOLIBC_CRT_H +#include "compiler.h" + char **environ __attribute__((weak)); const unsigned long *_auxv __attribute__((weak)); +void _start(void); static void __stack_chk_init(void); static void exit(int); @@ -22,7 +25,11 @@ extern void (*const __init_array_end[])(int, char **, char**) __attribute__((wea extern void (*const __fini_array_start[])(void) __attribute__((weak)); extern void (*const __fini_array_end[])(void) __attribute__((weak)); +void _start_c(long *sp); __attribute__((weak,used)) +#if __nolibc_has_feature(undefined_behavior_sanitizer) + __attribute__((no_sanitize("function"))) +#endif void _start_c(long *sp) { long argc; diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h index 6f90706d0644..470fdf34394a 100644 --- a/tools/include/nolibc/ctype.h +++ b/tools/include/nolibc/ctype.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_CTYPE_H #define _NOLIBC_CTYPE_H @@ -96,7 +99,4 @@ int ispunct(int c) return isgraph(c) && !isalnum(c); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h new file mode 100644 index 000000000000..758b95c48e7a --- /dev/null +++ b/tools/include/nolibc/dirent.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Directory access for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_DIRENT_H +#define _NOLIBC_DIRENT_H + +#include "compiler.h" +#include "stdint.h" +#include "types.h" +#include "fcntl.h" + +#include <linux/limits.h> + +struct dirent { + ino_t d_ino; + char d_name[NAME_MAX + 1]; +}; + +/* See comment of FILE in stdio.h */ +typedef struct { + char dummy[1]; +} DIR; + +static __attribute__((unused)) +DIR *fdopendir(int fd) +{ + if (fd < 0) { + SET_ERRNO(EBADF); + return NULL; + } + return (DIR *)(intptr_t)~fd; +} + +static __attribute__((unused)) +DIR *opendir(const char *name) +{ + int fd; + + fd = open(name, O_RDONLY); + if (fd == -1) + return NULL; + return fdopendir(fd); +} + +static __attribute__((unused)) +int closedir(DIR *dirp) +{ + intptr_t i = (intptr_t)dirp; + + if (i >= 0) { + SET_ERRNO(EBADF); + return -1; + } + return close(~i); +} + +static __attribute__((unused)) +int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) +{ + char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1] __nolibc_aligned_as(struct linux_dirent64); + struct linux_dirent64 *ldir = (void *)buf; + intptr_t i = (intptr_t)dirp; + int fd, ret; + + if (i >= 0) + return EBADF; + + fd = ~i; + + ret = sys_getdents64(fd, ldir, sizeof(buf)); + if (ret < 0) + return -ret; + if (ret == 0) { + *result = NULL; + return 0; + } + + /* + * getdents64() returns as many entries as fit the buffer. + * readdir() can only return one entry at a time. + * Make sure the non-returned ones are not skipped. + */ + ret = lseek(fd, ldir->d_off, SEEK_SET); + if (ret == -1) + return errno; + + entry->d_ino = ldir->d_ino; + /* the destination should always be big enough */ + strlcpy(entry->d_name, ldir->d_name, sizeof(entry->d_name)); + *result = entry; + return 0; +} + +#endif /* _NOLIBC_DIRENT_H */ diff --git a/tools/include/nolibc/elf.h b/tools/include/nolibc/elf.h new file mode 100644 index 000000000000..3e2c5228bf3d --- /dev/null +++ b/tools/include/nolibc/elf.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Shim elf.h header for NOLIBC. + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SYS_ELF_H +#define _NOLIBC_SYS_ELF_H + +#include <linux/elf.h> + +#endif /* _NOLIBC_SYS_ELF_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h index a44486ff0477..08a33c40ec0c 100644 --- a/tools/include/nolibc/errno.h +++ b/tools/include/nolibc/errno.h @@ -4,10 +4,13 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_ERRNO_H #define _NOLIBC_ERRNO_H -#include <asm/errno.h> +#include <linux/errno.h> #ifndef NOLIBC_IGNORE_ERRNO #define SET_ERRNO(v) do { errno = (v); } while (0) @@ -22,7 +25,4 @@ int errno __attribute__((weak)); */ #define MAX_ERRNO 4095 -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/fcntl.h b/tools/include/nolibc/fcntl.h new file mode 100644 index 000000000000..bff2e542f20f --- /dev/null +++ b/tools/include/nolibc/fcntl.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * fcntl definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_FCNTL_H +#define _NOLIBC_FCNTL_H + +#include "arch.h" +#include "types.h" +#include "sys.h" + +/* + * int openat(int dirfd, const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_openat(int dirfd, const char *path, int flags, mode_t mode) +{ + return my_syscall4(__NR_openat, dirfd, path, flags, mode); +} + +static __attribute__((unused)) +int openat(int dirfd, const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + return __sysret(sys_openat(dirfd, path, flags, mode)); +} + +/* + * int open(const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_open(const char *path, int flags, mode_t mode) +{ + return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); +} + +static __attribute__((unused)) +int open(const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + return __sysret(sys_open(path, flags, mode)); +} + +#endif /* _NOLIBC_FCNTL_H */ diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h new file mode 100644 index 000000000000..217abb95264b --- /dev/null +++ b/tools/include/nolibc/getopt.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * getopt function definitions for NOLIBC, adapted from musl libc + * Copyright (C) 2005-2020 Rich Felker, et al. + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_GETOPT_H +#define _NOLIBC_GETOPT_H + +struct FILE; +static struct FILE *const stderr; +static int fprintf(struct FILE *stream, const char *fmt, ...); + +__attribute__((weak,unused,section(".data.nolibc_getopt"))) +char *optarg; + +__attribute__((weak,unused,section(".data.nolibc_getopt"))) +int optind = 1, opterr = 1, optopt; + +static __attribute__((unused)) +int getopt(int argc, char * const argv[], const char *optstring) +{ + static int __optpos; + int i; + char c, d; + char *optchar; + + if (!optind) { + __optpos = 0; + optind = 1; + } + + if (optind >= argc || !argv[optind]) + return -1; + + if (argv[optind][0] != '-') { + if (optstring[0] == '-') { + optarg = argv[optind++]; + return 1; + } + return -1; + } + + if (!argv[optind][1]) + return -1; + + if (argv[optind][1] == '-' && !argv[optind][2]) + return optind++, -1; + + if (!__optpos) + __optpos++; + c = argv[optind][__optpos]; + optchar = argv[optind] + __optpos; + __optpos++; + + if (!argv[optind][__optpos]) { + optind++; + __optpos = 0; + } + + if (optstring[0] == '-' || optstring[0] == '+') + optstring++; + + i = 0; + d = 0; + do { + d = optstring[i++]; + } while (d && d != c); + + if (d != c || c == ':') { + optopt = c; + if (optstring[0] != ':' && opterr) + fprintf(stderr, "%s: unrecognized option: %c\n", argv[0], *optchar); + return '?'; + } + if (optstring[i] == ':') { + optarg = 0; + if (optstring[i + 1] != ':' || __optpos) { + optarg = argv[optind++]; + if (__optpos) + optarg += __optpos; + __optpos = 0; + } + if (optind > argc) { + optopt = c; + if (optstring[0] == ':') + return ':'; + if (opterr) + fprintf(stderr, "%s: option requires argument: %c\n", + argv[0], *optchar); + return '?'; + } + } + return c; +} + +#endif /* _NOLIBC_GETOPT_H */ diff --git a/tools/include/nolibc/limits.h b/tools/include/nolibc/limits.h new file mode 100644 index 000000000000..306d4141f4d2 --- /dev/null +++ b/tools/include/nolibc/limits.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Shim limits.h header for NOLIBC. + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +#include "nolibc.h" diff --git a/tools/include/nolibc/math.h b/tools/include/nolibc/math.h new file mode 100644 index 000000000000..9df823ddd412 --- /dev/null +++ b/tools/include/nolibc/math.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * math definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SYS_MATH_H +#define _NOLIBC_SYS_MATH_H + +static __inline__ +double fabs(double x) +{ + return x >= 0 ? x : -x; +} + +static __inline__ +float fabsf(float x) +{ + return x >= 0 ? x : -x; +} + +static __inline__ +long double fabsl(long double x) +{ + return x >= 0 ? x : -x; +} + +#endif /* _NOLIBC_SYS_MATH_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 92436b1e4441..c199ade200c2 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -31,8 +31,7 @@ * - The third level is the libc call definition. It exposes the lower raw * sys_<name>() calls in a way that looks like what a libc usually does, * takes care of specific input values, and of setting errno upon error. - * There can be minor variations compared to standard libc calls. For - * example the open() call always takes 3 args here. + * There can be minor variations compared to standard libc calls. * * The errno variable is declared static and unused. This way it can be * optimized away if not used. However this means that a program made of @@ -97,7 +96,24 @@ #include "arch.h" #include "types.h" #include "sys.h" +#include "sys/auxv.h" +#include "sys/ioctl.h" +#include "sys/mman.h" +#include "sys/mount.h" +#include "sys/prctl.h" +#include "sys/random.h" +#include "sys/reboot.h" +#include "sys/resource.h" +#include "sys/stat.h" +#include "sys/syscall.h" +#include "sys/sysmacros.h" +#include "sys/time.h" +#include "sys/timerfd.h" +#include "sys/utsname.h" +#include "sys/wait.h" #include "ctype.h" +#include "elf.h" +#include "sched.h" #include "signal.h" #include "unistd.h" #include "stdio.h" @@ -105,6 +121,11 @@ #include "string.h" #include "time.h" #include "stackprotector.h" +#include "dirent.h" +#include "fcntl.h" +#include "getopt.h" +#include "poll.h" +#include "math.h" /* Used by programs to avoid std includes */ #define NOLIBC diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h new file mode 100644 index 000000000000..1765acb17ea0 --- /dev/null +++ b/tools/include/nolibc/poll.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * poll definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_POLL_H +#define _NOLIBC_POLL_H + +#include "arch.h" +#include "sys.h" + +#include <linux/poll.h> +#include <linux/time.h> + +/* + * int poll(struct pollfd *fds, int nfds, int timeout); + */ + +static __attribute__((unused)) +int sys_poll(struct pollfd *fds, int nfds, int timeout) +{ +#if defined(__NR_ppoll) + struct timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#elif defined(__NR_ppoll_time64) + struct __kernel_timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#elif defined(__NR_poll) + return my_syscall3(__NR_poll, fds, nfds, timeout); +#else + return __nolibc_enosys(__func__, fds, nfds, timeout); +#endif +} + +static __attribute__((unused)) +int poll(struct pollfd *fds, int nfds, int timeout) +{ + return __sysret(sys_poll(fds, nfds, timeout)); +} + +#endif /* _NOLIBC_POLL_H */ diff --git a/tools/include/nolibc/sched.h b/tools/include/nolibc/sched.h new file mode 100644 index 000000000000..32221562c166 --- /dev/null +++ b/tools/include/nolibc/sched.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * sched function definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SCHED_H +#define _NOLIBC_SCHED_H + +#include "sys.h" + +#include <linux/sched.h> + +/* + * int setns(int fd, int nstype); + */ + +static __attribute__((unused)) +int sys_setns(int fd, int nstype) +{ + return my_syscall2(__NR_setns, fd, nstype); +} + +static __attribute__((unused)) +int setns(int fd, int nstype) +{ + return __sysret(sys_setns(fd, nstype)); +} + + +/* + * int unshare(int flags); + */ + +static __attribute__((unused)) +int sys_unshare(int flags) +{ + return my_syscall1(__NR_unshare, flags); +} + +static __attribute__((unused)) +int unshare(int flags) +{ + return __sysret(sys_unshare(flags)); +} + +#endif /* _NOLIBC_SCHED_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h index 137552216e46..ac13e53ac31d 100644 --- a/tools/include/nolibc/signal.h +++ b/tools/include/nolibc/signal.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SIGNAL_H #define _NOLIBC_SIGNAL_H @@ -13,13 +16,11 @@ #include "sys.h" /* This one is not marked static as it's needed by libgcc for divide by zero */ +int raise(int signal); __attribute__((weak,unused,section(".text.nolibc_raise"))) int raise(int signal) { return sys_kill(sys_getpid(), signal); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h index 1d0d5259ec41..c71a2c257177 100644 --- a/tools/include/nolibc/stackprotector.h +++ b/tools/include/nolibc/stackprotector.h @@ -18,6 +18,7 @@ * triggering stack protector errors themselves */ +void __stack_chk_fail(void); __attribute__((weak,used,noreturn,section(".text.nolibc_stack_chk"))) void __stack_chk_fail(void) { @@ -28,6 +29,7 @@ void __stack_chk_fail(void) for (;;); } +void __stack_chk_fail_local(void); __attribute__((weak,noreturn,section(".text.nolibc_stack_chk"))) void __stack_chk_fail_local(void) { diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h index 933bc0be7e1c..adda7333d12e 100644 --- a/tools/include/nolibc/std.h +++ b/tools/include/nolibc/std.h @@ -13,12 +13,8 @@ * syscall-specific stuff, as this file is expected to be included very early. */ -/* note: may already be defined */ -#ifndef NULL -#define NULL ((void *)0) -#endif - #include "stdint.h" +#include "stddef.h" /* those are commonly provided by sys/types.h */ typedef unsigned int dev_t; diff --git a/tools/include/nolibc/stddef.h b/tools/include/nolibc/stddef.h new file mode 100644 index 000000000000..ecbd13eab1f5 --- /dev/null +++ b/tools/include/nolibc/stddef.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Stddef definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_STDDEF_H +#define _NOLIBC_STDDEF_H + +#include "stdint.h" + +/* note: may already be defined */ +#ifndef NULL +#define NULL ((void *)0) +#endif + +#ifndef offsetof +#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) +#endif + +#endif /* _NOLIBC_STDDEF_H */ diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h index cd79ddd6170e..b052ad6303c3 100644 --- a/tools/include/nolibc/stdint.h +++ b/tools/include/nolibc/stdint.h @@ -39,8 +39,8 @@ typedef size_t uint_fast32_t; typedef int64_t int_fast64_t; typedef uint64_t uint_fast64_t; -typedef int64_t intmax_t; -typedef uint64_t uintmax_t; +typedef __INTMAX_TYPE__ intmax_t; +typedef __UINTMAX_TYPE__ uintmax_t; /* limits of integral types */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 3892034198dd..c470d334ef3f 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -4,12 +4,16 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STDIO_H #define _NOLIBC_STDIO_H #include "std.h" #include "arch.h" #include "errno.h" +#include "fcntl.h" #include "types.h" #include "sys.h" #include "stdarg.h" @@ -17,6 +21,8 @@ #include "string.h" #include "compiler.h" +static const char *strerror(int errnum); + #ifndef EOF #define EOF (-1) #endif @@ -50,6 +56,32 @@ FILE *fdopen(int fd, const char *mode __attribute__((unused))) return (FILE*)(intptr_t)~fd; } +static __attribute__((unused)) +FILE *fopen(const char *pathname, const char *mode) +{ + int flags, fd; + + switch (*mode) { + case 'r': + flags = O_RDONLY; + break; + case 'w': + flags = O_WRONLY | O_CREAT | O_TRUNC; + break; + case 'a': + flags = O_WRONLY | O_CREAT | O_APPEND; + break; + default: + SET_ERRNO(EINVAL); return NULL; + } + + if (mode[1] == '+') + flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR; + + fd = open(pathname, flags, 0666); + return fdopen(fd, mode); +} + /* provides the fd of stream. */ static __attribute__((unused)) int fileno(FILE *stream) @@ -208,28 +240,40 @@ char *fgets(char *s, int size, FILE *stream) } -/* minimal vfprintf(). It supports the following formats: +/* minimal printf(). It supports the following formats: * - %[l*]{d,u,c,x,p} * - %s * - unknown modifiers are ignored. */ -static __attribute__((unused, format(printf, 2, 0))) -int vfprintf(FILE *stream, const char *fmt, va_list args) +typedef int (*__nolibc_printf_cb)(intptr_t state, const char *buf, size_t size); + +static __attribute__((unused, format(printf, 4, 0))) +int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char *fmt, va_list args) { char escape, lpref, c; unsigned long long v; - unsigned int written; - size_t len, ofs; + unsigned int written, width; + size_t len, ofs, w; char tmpbuf[21]; const char *outstr; written = ofs = escape = lpref = 0; while (1) { c = fmt[ofs++]; + width = 0; if (escape) { /* we're in an escape sequence, ofs == 1 */ escape = 0; + + /* width */ + while (c >= '0' && c <= '9') { + width *= 10; + width += c - '0'; + + c = fmt[ofs++]; + } + if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') { char *out = tmpbuf; @@ -277,6 +321,11 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) if (!outstr) outstr="(null)"; } +#ifndef NOLIBC_IGNORE_ERRNO + else if (c == 'm') { + outstr = strerror(errno); + } +#endif /* NOLIBC_IGNORE_ERRNO */ else if (c == '%') { /* queue it verbatim */ continue; @@ -286,6 +335,8 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) if (c == 'l') { /* long format prefix, maintain the escape */ lpref++; + } else if (c == 'j') { + lpref = 2; } escape = 1; goto do_escape; @@ -302,8 +353,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) outstr = fmt; len = ofs - 1; flush_str: - if (_fwrite(outstr, len, stream) != 0) - break; + if (n) { + w = len < n ? len : n; + n -= w; + while (width-- > w) { + if (cb(state, " ", 1) != 0) + break; + written += 1; + } + if (cb(state, outstr, w) != 0) + break; + } written += len; do_escape: @@ -319,6 +379,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) return written; } +static int __nolibc_fprintf_cb(intptr_t state, const char *buf, size_t size) +{ + return _fwrite(buf, size, (FILE *)state); +} + +static __attribute__((unused, format(printf, 2, 0))) +int vfprintf(FILE *stream, const char *fmt, va_list args) +{ + return __nolibc_printf(__nolibc_fprintf_cb, (intptr_t)stream, SIZE_MAX, fmt, args); +} + static __attribute__((unused, format(printf, 1, 0))) int vprintf(const char *fmt, va_list args) { @@ -349,6 +420,183 @@ int printf(const char *fmt, ...) return ret; } +static __attribute__((unused, format(printf, 2, 0))) +int vdprintf(int fd, const char *fmt, va_list args) +{ + FILE *stream; + + stream = fdopen(fd, NULL); + if (!stream) + return -1; + /* Technically 'stream' is leaked, but as it's only a wrapper around 'fd' that is fine */ + return vfprintf(stream, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int dprintf(int fd, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vdprintf(fd, fmt, args); + va_end(args); + + return ret; +} + +static int __nolibc_sprintf_cb(intptr_t _state, const char *buf, size_t size) +{ + char **state = (char **)_state; + + memcpy(*state, buf, size); + *state += size; + return 0; +} + +static __attribute__((unused, format(printf, 3, 0))) +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + char *state = buf; + int ret; + + ret = __nolibc_printf(__nolibc_sprintf_cb, (intptr_t)&state, size, fmt, args); + if (ret < 0) + return ret; + buf[(size_t)ret < size ? (size_t)ret : size - 1] = '\0'; + return ret; +} + +static __attribute__((unused, format(printf, 3, 4))) +int snprintf(char *buf, size_t size, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vsnprintf(buf, size, fmt, args); + va_end(args); + + return ret; +} + +static __attribute__((unused, format(printf, 2, 0))) +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, SIZE_MAX, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int sprintf(char *buf, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vsprintf(buf, fmt, args); + va_end(args); + + return ret; +} + +static __attribute__((unused)) +int vsscanf(const char *str, const char *format, va_list args) +{ + uintmax_t uval; + intmax_t ival; + int base; + char *endptr; + int matches; + int lpref; + + matches = 0; + + while (1) { + if (*format == '%') { + /* start of pattern */ + lpref = 0; + format++; + + if (*format == 'l') { + /* same as in printf() */ + lpref = 1; + format++; + if (*format == 'l') { + lpref = 2; + format++; + } + } + + if (*format == '%') { + /* literal % */ + if ('%' != *str) + goto done; + str++; + format++; + continue; + } else if (*format == 'd') { + ival = strtoll(str, &endptr, 10); + if (lpref == 0) + *va_arg(args, int *) = ival; + else if (lpref == 1) + *va_arg(args, long *) = ival; + else if (lpref == 2) + *va_arg(args, long long *) = ival; + } else if (*format == 'u' || *format == 'x' || *format == 'X') { + base = *format == 'u' ? 10 : 16; + uval = strtoull(str, &endptr, base); + if (lpref == 0) + *va_arg(args, unsigned int *) = uval; + else if (lpref == 1) + *va_arg(args, unsigned long *) = uval; + else if (lpref == 2) + *va_arg(args, unsigned long long *) = uval; + } else if (*format == 'p') { + *va_arg(args, void **) = (void *)strtoul(str, &endptr, 16); + } else { + SET_ERRNO(EILSEQ); + goto done; + } + + format++; + str = endptr; + matches++; + + } else if (*format == '\0') { + goto done; + } else if (isspace(*format)) { + /* skip spaces in format and str */ + while (isspace(*format)) + format++; + while (isspace(*str)) + str++; + } else if (*format == *str) { + /* literal match */ + format++; + str++; + } else { + if (!matches) + matches = EOF; + goto done; + } + } + +done: + return matches; +} + +static __attribute__((unused, format(scanf, 2, 3))) +int sscanf(const char *str, const char *format, ...) +{ + va_list args; + int ret; + + va_start(args, format); + ret = vsscanf(str, format, args); + va_end(args); + return ret; +} + static __attribute__((unused)) void perror(const char *msg) { @@ -387,7 +635,4 @@ const char *strerror(int errno) return buf; } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 75aa273c23a6..5fd99a480f82 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STDLIB_H #define _NOLIBC_STDLIB_H @@ -29,7 +32,26 @@ static __attribute__((unused)) char itoa_buffer[21]; * As much as possible, please keep functions alphabetically sorted. */ +static __inline__ +int abs(int j) +{ + return j >= 0 ? j : -j; +} + +static __inline__ +long labs(long j) +{ + return j >= 0 ? j : -j; +} + +static __inline__ +long long llabs(long long j) +{ + return j >= 0 ? j : -j; +} + /* must be exported, as it's used by libgcc for various divide functions */ +void abort(void); __attribute__((weak,unused,noreturn,section(".text.nolibc_abort"))) void abort(void) { @@ -102,32 +124,6 @@ char *getenv(const char *name) } static __attribute__((unused)) -unsigned long getauxval(unsigned long type) -{ - const unsigned long *auxv = _auxv; - unsigned long ret; - - if (!auxv) - return 0; - - while (1) { - if (!auxv[0] && !auxv[1]) { - ret = 0; - break; - } - - if (auxv[0] == type) { - ret = auxv[1]; - break; - } - - auxv += 2; - } - - return ret; -} - -static __attribute__((unused)) void *malloc(size_t len) { struct nolibc_heap *heap; @@ -274,7 +270,7 @@ int itoa_r(long in, char *buffer) int len = 0; if (in < 0) { - in = -in; + in = -(unsigned long)in; *(ptr++) = '-'; len++; } @@ -410,7 +406,7 @@ int i64toa_r(int64_t in, char *buffer) int len = 0; if (in < 0) { - in = -in; + in = -(uint64_t)in; *(ptr++) = '-'; len++; } @@ -547,7 +543,4 @@ uintmax_t strtoumax(const char *nptr, char **endptr, int base) return __strtox(nptr, endptr, base, 0, UINTMAX_MAX); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index 9ec9c24f38c0..163a17e7dd38 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STRING_H #define _NOLIBC_STRING_H @@ -32,6 +35,7 @@ int memcmp(const void *s1, const void *s2, size_t n) /* might be ignored by the compiler without -ffreestanding, then found as * missing. */ +void *memmove(void *dst, const void *src, size_t len); __attribute__((weak,unused,section(".text.nolibc_memmove"))) void *memmove(void *dst, const void *src, size_t len) { @@ -56,6 +60,7 @@ void *memmove(void *dst, const void *src, size_t len) #ifndef NOLIBC_ARCH_HAS_MEMCPY /* must be exported, as it's used by libgcc on ARM */ +void *memcpy(void *dst, const void *src, size_t len); __attribute__((weak,unused,section(".text.nolibc_memcpy"))) void *memcpy(void *dst, const void *src, size_t len) { @@ -73,6 +78,7 @@ void *memcpy(void *dst, const void *src, size_t len) /* might be ignored by the compiler without -ffreestanding, then found as * missing. */ +void *memset(void *dst, int b, size_t len); __attribute__((weak,unused,section(".text.nolibc_memset"))) void *memset(void *dst, int b, size_t len) { @@ -124,6 +130,7 @@ char *strcpy(char *dst, const char *src) * thus itself, hence the asm() statement below that's meant to disable this * confusing practice. */ +size_t strlen(const char *str); __attribute__((weak,unused,section(".text.nolibc_strlen"))) size_t strlen(const char *str) { @@ -285,7 +292,40 @@ char *strrchr(const char *s, int c) return (char *)ret; } -/* make sure to include all global symbols */ -#include "nolibc.h" +static __attribute__((unused)) +char *strstr(const char *haystack, const char *needle) +{ + size_t len_haystack, len_needle; + + len_needle = strlen(needle); + if (!len_needle) + return NULL; + + len_haystack = strlen(haystack); + while (len_haystack >= len_needle) { + if (!memcmp(haystack, needle, len_needle)) + return (char *)haystack; + haystack++; + len_haystack--; + } + + return NULL; +} + +static __attribute__((unused)) +int tolower(int c) +{ + if (c >= 'A' && c <= 'Z') + return c - 'A' + 'a'; + return c; +} + +static __attribute__((unused)) +int toupper(int c) +{ + if (c >= 'a' && c <= 'z') + return c - 'a' + 'A'; + return c; +} #endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index d4a5c2399a66..9556c69a6ae1 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -4,28 +4,26 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SYS_H #define _NOLIBC_SYS_H #include "std.h" /* system includes */ -#include <asm/unistd.h> -#include <asm/signal.h> /* for SIGCHLD */ -#include <asm/ioctls.h> -#include <asm/mman.h> +#include <linux/unistd.h> +#include <linux/signal.h> /* for SIGCHLD */ +#include <linux/termios.h> +#include <linux/mman.h> #include <linux/fs.h> #include <linux/loop.h> #include <linux/time.h> #include <linux/auxvec.h> #include <linux/fcntl.h> /* for O_* and AT_* */ #include <linux/stat.h> /* for statx() */ -#include <linux/prctl.h> -#include <linux/resource.h> -#include <linux/utsname.h> -#include <linux/signal.h> -#include "arch.h" #include "errno.h" #include "stdarg.h" #include "types.h" @@ -302,11 +300,17 @@ void sys_exit(int status) } static __attribute__((noreturn,unused)) -void exit(int status) +void _exit(int status) { sys_exit(status); } +static __attribute__((noreturn,unused)) +void exit(int status) +{ + _exit(status); +} + /* * pid_t fork(void); @@ -490,27 +494,6 @@ int getpagesize(void) /* - * int gettimeofday(struct timeval *tv, struct timezone *tz); - */ - -static __attribute__((unused)) -int sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ -#ifdef __NR_gettimeofday - return my_syscall2(__NR_gettimeofday, tv, tz); -#else - return __nolibc_enosys(__func__, tv, tz); -#endif -} - -static __attribute__((unused)) -int gettimeofday(struct timeval *tv, struct timezone *tz) -{ - return __sysret(sys_gettimeofday(tv, tz)); -} - - -/* * uid_t getuid(void); */ @@ -532,22 +515,6 @@ uid_t getuid(void) /* - * int ioctl(int fd, unsigned long req, void *value); - */ - -static __attribute__((unused)) -int sys_ioctl(int fd, unsigned long req, void *value) -{ - return my_syscall3(__NR_ioctl, fd, req, value); -} - -static __attribute__((unused)) -int ioctl(int fd, unsigned long req, void *value) -{ - return __sysret(sys_ioctl(fd, req, value)); -} - -/* * int kill(pid_t pid, int signal); */ @@ -602,9 +569,36 @@ off_t sys_lseek(int fd, off_t offset, int whence) } static __attribute__((unused)) +int sys_llseek(int fd, unsigned long offset_high, unsigned long offset_low, + __kernel_loff_t *result, int whence) +{ +#ifdef __NR_llseek + return my_syscall5(__NR_llseek, fd, offset_high, offset_low, result, whence); +#else + return __nolibc_enosys(__func__, fd, offset_high, offset_low, result, whence); +#endif +} + +static __attribute__((unused)) off_t lseek(int fd, off_t offset, int whence) { - return __sysret(sys_lseek(fd, offset, whence)); + __kernel_loff_t loff = 0; + off_t result; + int ret; + + result = sys_lseek(fd, offset, whence); + if (result == -ENOSYS) { + /* Only exists on 32bit where nolibc off_t is also 32bit */ + ret = sys_llseek(fd, 0, offset, &loff, whence); + if (ret < 0) + result = ret; + else if (loff != (off_t)loff) + result = -EOVERFLOW; + else + result = loff; + } + + return __sysret(result); } @@ -675,106 +669,6 @@ int mknod(const char *path, mode_t mode, dev_t dev) return __sysret(sys_mknod(path, mode, dev)); } -#ifndef sys_mmap -static __attribute__((unused)) -void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, - off_t offset) -{ - int n; - -#if defined(__NR_mmap2) - n = __NR_mmap2; - offset >>= 12; -#else - n = __NR_mmap; -#endif - - return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); -} -#endif - -/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret() - * which returns -1 upon error and still satisfy user land that checks for - * MAP_FAILED. - */ - -static __attribute__((unused)) -void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) -{ - void *ret = sys_mmap(addr, length, prot, flags, fd, offset); - - if ((unsigned long)ret >= -4095UL) { - SET_ERRNO(-(long)ret); - ret = MAP_FAILED; - } - return ret; -} - -static __attribute__((unused)) -int sys_munmap(void *addr, size_t length) -{ - return my_syscall2(__NR_munmap, addr, length); -} - -static __attribute__((unused)) -int munmap(void *addr, size_t length) -{ - return __sysret(sys_munmap(addr, length)); -} - -/* - * int mount(const char *source, const char *target, - * const char *fstype, unsigned long flags, - * const void *data); - */ -static __attribute__((unused)) -int sys_mount(const char *src, const char *tgt, const char *fst, - unsigned long flags, const void *data) -{ - return my_syscall5(__NR_mount, src, tgt, fst, flags, data); -} - -static __attribute__((unused)) -int mount(const char *src, const char *tgt, - const char *fst, unsigned long flags, - const void *data) -{ - return __sysret(sys_mount(src, tgt, fst, flags, data)); -} - - -/* - * int open(const char *path, int flags[, mode_t mode]); - */ - -static __attribute__((unused)) -int sys_open(const char *path, int flags, mode_t mode) -{ -#ifdef __NR_openat - return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); -#elif defined(__NR_open) - return my_syscall3(__NR_open, path, flags, mode); -#else - return __nolibc_enosys(__func__, path, flags, mode); -#endif -} - -static __attribute__((unused)) -int open(const char *path, int flags, ...) -{ - mode_t mode = 0; - - if (flags & O_CREAT) { - va_list args; - - va_start(args, flags); - mode = va_arg(args, int); - va_end(args); - } - - return __sysret(sys_open(path, flags, mode)); -} - /* * int pipe2(int pipefd[2], int flags); @@ -801,26 +695,6 @@ int pipe(int pipefd[2]) /* - * int prctl(int option, unsigned long arg2, unsigned long arg3, - * unsigned long arg4, unsigned long arg5); - */ - -static __attribute__((unused)) -int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); -} - -static __attribute__((unused)) -int prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); -} - - -/* * int pivot_root(const char *new, const char *old); */ @@ -838,35 +712,6 @@ int pivot_root(const char *new, const char *old) /* - * int poll(struct pollfd *fds, int nfds, int timeout); - */ - -static __attribute__((unused)) -int sys_poll(struct pollfd *fds, int nfds, int timeout) -{ -#if defined(__NR_ppoll) - struct timespec t; - - if (timeout >= 0) { - t.tv_sec = timeout / 1000; - t.tv_nsec = (timeout % 1000) * 1000000; - } - return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#elif defined(__NR_poll) - return my_syscall3(__NR_poll, fds, nfds, timeout); -#else - return __nolibc_enosys(__func__, fds, nfds, timeout); -#endif -} - -static __attribute__((unused)) -int poll(struct pollfd *fds, int nfds, int timeout) -{ - return __sysret(sys_poll(fds, nfds, timeout)); -} - - -/* * ssize_t read(int fd, void *buf, size_t count); */ @@ -884,61 +729,6 @@ ssize_t read(int fd, void *buf, size_t count) /* - * int reboot(int cmd); - * <cmd> is among LINUX_REBOOT_CMD_* - */ - -static __attribute__((unused)) -ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) -{ - return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); -} - -static __attribute__((unused)) -int reboot(int cmd) -{ - return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0)); -} - - -/* - * int getrlimit(int resource, struct rlimit *rlim); - * int setrlimit(int resource, const struct rlimit *rlim); - */ - -static __attribute__((unused)) -int sys_prlimit64(pid_t pid, int resource, - const struct rlimit64 *new_limit, struct rlimit64 *old_limit) -{ - return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); -} - -static __attribute__((unused)) -int getrlimit(int resource, struct rlimit *rlim) -{ - struct rlimit64 rlim64; - int ret; - - ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); - rlim->rlim_cur = rlim64.rlim_cur; - rlim->rlim_max = rlim64.rlim_max; - - return ret; -} - -static __attribute__((unused)) -int setrlimit(int resource, const struct rlimit *rlim) -{ - struct rlimit64 rlim64 = { - .rlim_cur = rlim->rlim_cur, - .rlim_max = rlim->rlim_max, - }; - - return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); -} - - -/* * int sched_yield(void); */ @@ -982,6 +772,14 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva t.tv_nsec = timeout->tv_usec * 1000; } return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); +#elif defined(__NR_pselect6_time64) + struct __kernel_timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = timeout->tv_usec * 1000; + } + return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); #else return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout); #endif @@ -1010,77 +808,31 @@ int setpgid(pid_t pid, pid_t pgid) return __sysret(sys_setpgid(pid, pgid)); } - /* - * pid_t setsid(void); + * pid_t setpgrp(void) */ static __attribute__((unused)) -pid_t sys_setsid(void) +pid_t setpgrp(void) { - return my_syscall0(__NR_setsid); + return setpgid(0, 0); } -static __attribute__((unused)) -pid_t setsid(void) -{ - return __sysret(sys_setsid()); -} /* - * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); - * int stat(const char *path, struct stat *buf); + * pid_t setsid(void); */ static __attribute__((unused)) -int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) -{ -#ifdef __NR_statx - return my_syscall5(__NR_statx, fd, path, flags, mask, buf); -#else - return __nolibc_enosys(__func__, fd, path, flags, mask, buf); -#endif -} - -static __attribute__((unused)) -int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +pid_t sys_setsid(void) { - return __sysret(sys_statx(fd, path, flags, mask, buf)); + return my_syscall0(__NR_setsid); } - static __attribute__((unused)) -int stat(const char *path, struct stat *buf) +pid_t setsid(void) { - struct statx statx; - long ret; - - ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); - if (ret == -1) - return ret; - - buf->st_dev = ((statx.stx_dev_minor & 0xff) - | (statx.stx_dev_major << 8) - | ((statx.stx_dev_minor & ~0xff) << 12)); - buf->st_ino = statx.stx_ino; - buf->st_mode = statx.stx_mode; - buf->st_nlink = statx.stx_nlink; - buf->st_uid = statx.stx_uid; - buf->st_gid = statx.stx_gid; - buf->st_rdev = ((statx.stx_rdev_minor & 0xff) - | (statx.stx_rdev_major << 8) - | ((statx.stx_rdev_minor & ~0xff) << 12)); - buf->st_size = statx.stx_size; - buf->st_blksize = statx.stx_blksize; - buf->st_blocks = statx.stx_blocks; - buf->st_atim.tv_sec = statx.stx_atime.tv_sec; - buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; - buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; - buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; - buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; - buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; - - return 0; + return __sysret(sys_setsid()); } @@ -1142,32 +894,6 @@ int umount2(const char *path, int flags) /* - * int uname(struct utsname *buf); - */ - -struct utsname { - char sysname[65]; - char nodename[65]; - char release[65]; - char version[65]; - char machine[65]; - char domainname[65]; -}; - -static __attribute__((unused)) -int sys_uname(struct utsname *buf) -{ - return my_syscall1(__NR_uname, buf); -} - -static __attribute__((unused)) -int uname(struct utsname *buf) -{ - return __sysret(sys_uname(buf)); -} - - -/* * int unlink(const char *path); */ @@ -1191,59 +917,6 @@ int unlink(const char *path) /* - * pid_t wait(int *status); - * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); - * pid_t waitpid(pid_t pid, int *status, int options); - */ - -static __attribute__((unused)) -pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ -#ifdef __NR_wait4 - return my_syscall4(__NR_wait4, pid, status, options, rusage); -#else - return __nolibc_enosys(__func__, pid, status, options, rusage); -#endif -} - -static __attribute__((unused)) -pid_t wait(int *status) -{ - return __sysret(sys_wait4(-1, status, 0, NULL)); -} - -static __attribute__((unused)) -pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - return __sysret(sys_wait4(pid, status, options, rusage)); -} - - -static __attribute__((unused)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - return __sysret(sys_wait4(pid, status, options, NULL)); -} - - -/* - * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); - */ - -static __attribute__((unused)) -int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) -{ - return my_syscall5(__NR_waitid, which, pid, infop, options, rusage); -} - -static __attribute__((unused)) -int waitid(int which, pid_t pid, siginfo_t *infop, int options) -{ - return __sysret(sys_waitid(which, pid, infop, options, NULL)); -} - - -/* * ssize_t write(int fd, const void *buf, size_t count); */ @@ -1276,7 +949,4 @@ int memfd_create(const char *name, unsigned int flags) return __sysret(sys_memfd_create(name, flags)); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h new file mode 100644 index 000000000000..c52463d6c18d --- /dev/null +++ b/tools/include/nolibc/sys/auxv.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * auxv definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_AUXV_H +#define _NOLIBC_SYS_AUXV_H + +#include "../crt.h" + +static __attribute__((unused)) +unsigned long getauxval(unsigned long type) +{ + const unsigned long *auxv = _auxv; + unsigned long ret; + + if (!auxv) + return 0; + + while (1) { + if (!auxv[0] && !auxv[1]) { + ret = 0; + break; + } + + if (auxv[0] == type) { + ret = auxv[1]; + break; + } + + auxv += 2; + } + + return ret; +} + +#endif /* _NOLIBC_SYS_AUXV_H */ diff --git a/tools/include/nolibc/sys/ioctl.h b/tools/include/nolibc/sys/ioctl.h new file mode 100644 index 000000000000..fc880687e02a --- /dev/null +++ b/tools/include/nolibc/sys/ioctl.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Ioctl definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_IOCTL_H +#define _NOLIBC_SYS_IOCTL_H + +#include "../sys.h" + +#include <linux/ioctl.h> + +/* + * int ioctl(int fd, unsigned long cmd, ... arg); + */ + +static __attribute__((unused)) +long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + return my_syscall3(__NR_ioctl, fd, cmd, arg); +} + +#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg))) + +#endif /* _NOLIBC_SYS_IOCTL_H */ diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h new file mode 100644 index 000000000000..5228751b458c --- /dev/null +++ b/tools/include/nolibc/sys/mman.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * mm definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_MMAN_H +#define _NOLIBC_SYS_MMAN_H + +#include "../arch.h" +#include "../sys.h" + +#ifndef sys_mmap +static __attribute__((unused)) +void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset) +{ + int n; + +#if defined(__NR_mmap2) + n = __NR_mmap2; + offset >>= 12; +#else + n = __NR_mmap; +#endif + + return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); +} +#endif + +/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret() + * which returns -1 upon error and still satisfy user land that checks for + * MAP_FAILED. + */ + +static __attribute__((unused)) +void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + void *ret = sys_mmap(addr, length, prot, flags, fd, offset); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +void *sys_mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) +{ + return (void *)my_syscall5(__NR_mremap, old_address, old_size, + new_size, flags, new_address); +} + +static __attribute__((unused)) +void *mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) +{ + void *ret = sys_mremap(old_address, old_size, new_size, flags, new_address); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +int sys_munmap(void *addr, size_t length) +{ + return my_syscall2(__NR_munmap, addr, length); +} + +static __attribute__((unused)) +int munmap(void *addr, size_t length) +{ + return __sysret(sys_munmap(addr, length)); +} + +#endif /* _NOLIBC_SYS_MMAN_H */ diff --git a/tools/include/nolibc/sys/mount.h b/tools/include/nolibc/sys/mount.h new file mode 100644 index 000000000000..e39ec02ea24c --- /dev/null +++ b/tools/include/nolibc/sys/mount.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Mount definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_MOUNT_H +#define _NOLIBC_SYS_MOUNT_H + +#include "../sys.h" + +#include <linux/mount.h> + +/* + * int mount(const char *source, const char *target, + * const char *fstype, unsigned long flags, + * const void *data); + */ +static __attribute__((unused)) +int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return my_syscall5(__NR_mount, src, tgt, fst, flags, data); +} + +static __attribute__((unused)) +int mount(const char *src, const char *tgt, + const char *fst, unsigned long flags, + const void *data) +{ + return __sysret(sys_mount(src, tgt, fst, flags, data)); +} + +#endif /* _NOLIBC_SYS_MOUNT_H */ diff --git a/tools/include/nolibc/sys/prctl.h b/tools/include/nolibc/sys/prctl.h new file mode 100644 index 000000000000..0205907b6ac8 --- /dev/null +++ b/tools/include/nolibc/sys/prctl.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Prctl definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_PRCTL_H +#define _NOLIBC_SYS_PRCTL_H + +#include "../sys.h" + +#include <linux/prctl.h> + +/* + * int prctl(int option, unsigned long arg2, unsigned long arg3, + * unsigned long arg4, unsigned long arg5); + */ + +static __attribute__((unused)) +int sys_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); +} + +static __attribute__((unused)) +int prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); +} + +#endif /* _NOLIBC_SYS_PRCTL_H */ diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h new file mode 100644 index 000000000000..8d9749f1c845 --- /dev/null +++ b/tools/include/nolibc/sys/random.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * random definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_RANDOM_H +#define _NOLIBC_SYS_RANDOM_H + +#include "../arch.h" +#include "../sys.h" + +#include <linux/random.h> + +/* + * ssize_t getrandom(void *buf, size_t buflen, unsigned int flags); + */ + +static __attribute__((unused)) +ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return my_syscall3(__NR_getrandom, buf, buflen, flags); +} + +static __attribute__((unused)) +ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return __sysret(sys_getrandom(buf, buflen, flags)); +} + +#endif /* _NOLIBC_SYS_RANDOM_H */ diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h new file mode 100644 index 000000000000..4a1e435be669 --- /dev/null +++ b/tools/include/nolibc/sys/reboot.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Reboot definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_REBOOT_H +#define _NOLIBC_SYS_REBOOT_H + +#include "../sys.h" + +#include <linux/reboot.h> + +/* + * int reboot(int cmd); + * <cmd> is among LINUX_REBOOT_CMD_* + */ + +static __attribute__((unused)) +ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) +{ + return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); +} + +static __attribute__((unused)) +int reboot(int cmd) +{ + return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0)); +} + +#endif /* _NOLIBC_SYS_REBOOT_H */ diff --git a/tools/include/nolibc/sys/resource.h b/tools/include/nolibc/sys/resource.h new file mode 100644 index 000000000000..b990f914dc56 --- /dev/null +++ b/tools/include/nolibc/sys/resource.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Resource definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_RESOURCE_H +#define _NOLIBC_SYS_RESOURCE_H + +#include "../sys.h" + +#include <linux/resource.h> + +/* + * int getrlimit(int resource, struct rlimit *rlim); + * int setrlimit(int resource, const struct rlimit *rlim); + */ + +static __attribute__((unused)) +int sys_prlimit64(pid_t pid, int resource, + const struct rlimit64 *new_limit, struct rlimit64 *old_limit) +{ + return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); +} + +static __attribute__((unused)) +int getrlimit(int resource, struct rlimit *rlim) +{ + struct rlimit64 rlim64; + int ret; + + ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); + rlim->rlim_cur = rlim64.rlim_cur; + rlim->rlim_max = rlim64.rlim_max; + + return ret; +} + +static __attribute__((unused)) +int setrlimit(int resource, const struct rlimit *rlim) +{ + struct rlimit64 rlim64 = { + .rlim_cur = rlim->rlim_cur, + .rlim_max = rlim->rlim_max, + }; + + return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); +} + +#endif /* _NOLIBC_SYS_RESOURCE_H */ diff --git a/tools/include/nolibc/sys/stat.h b/tools/include/nolibc/sys/stat.h new file mode 100644 index 000000000000..8b4d80e3ea03 --- /dev/null +++ b/tools/include/nolibc/sys/stat.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * stat definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_STAT_H +#define _NOLIBC_SYS_STAT_H + +#include "../arch.h" +#include "../types.h" +#include "../sys.h" + +/* + * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); + * int stat(const char *path, struct stat *buf); + * int fstatat(int fd, const char *path, struct stat *buf, int flag); + * int fstat(int fildes, struct stat *buf); + * int lstat(const char *path, struct stat *buf); + */ + +static __attribute__((unused)) +int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ +#ifdef __NR_statx + return my_syscall5(__NR_statx, fd, path, flags, mask, buf); +#else + return __nolibc_enosys(__func__, fd, path, flags, mask, buf); +#endif +} + +static __attribute__((unused)) +int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ + return __sysret(sys_statx(fd, path, flags, mask, buf)); +} + + +static __attribute__((unused)) +int fstatat(int fd, const char *path, struct stat *buf, int flag) +{ + struct statx statx; + long ret; + + ret = __sysret(sys_statx(fd, path, flag | AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); + if (ret == -1) + return ret; + + buf->st_dev = ((statx.stx_dev_minor & 0xff) + | (statx.stx_dev_major << 8) + | ((statx.stx_dev_minor & ~0xff) << 12)); + buf->st_ino = statx.stx_ino; + buf->st_mode = statx.stx_mode; + buf->st_nlink = statx.stx_nlink; + buf->st_uid = statx.stx_uid; + buf->st_gid = statx.stx_gid; + buf->st_rdev = ((statx.stx_rdev_minor & 0xff) + | (statx.stx_rdev_major << 8) + | ((statx.stx_rdev_minor & ~0xff) << 12)); + buf->st_size = statx.stx_size; + buf->st_blksize = statx.stx_blksize; + buf->st_blocks = statx.stx_blocks; + buf->st_atim.tv_sec = statx.stx_atime.tv_sec; + buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; + buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; + buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; + buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; + buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; + + return 0; +} + +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + return fstatat(AT_FDCWD, path, buf, 0); +} + +static __attribute__((unused)) +int fstat(int fildes, struct stat *buf) +{ + return fstatat(fildes, "", buf, AT_EMPTY_PATH); +} + +static __attribute__((unused)) +int lstat(const char *path, struct stat *buf) +{ + return fstatat(AT_FDCWD, path, buf, AT_SYMLINK_NOFOLLOW); +} + +#endif /* _NOLIBC_SYS_STAT_H */ diff --git a/tools/include/nolibc/sys/syscall.h b/tools/include/nolibc/sys/syscall.h new file mode 100644 index 000000000000..4bf97f1386a0 --- /dev/null +++ b/tools/include/nolibc/sys/syscall.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * syscall() definition for NOLIBC + * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SYSCALL_H +#define _NOLIBC_SYS_SYSCALL_H + +#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N +#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) +#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) +#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) +#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) + +#endif /* _NOLIBC_SYS_SYSCALL_H */ diff --git a/tools/include/nolibc/sys/sysmacros.h b/tools/include/nolibc/sys/sysmacros.h new file mode 100644 index 000000000000..37c33f030f02 --- /dev/null +++ b/tools/include/nolibc/sys/sysmacros.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Sysmacro definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SYSMACROS_H +#define _NOLIBC_SYS_SYSMACROS_H + +#include "../std.h" + +/* WARNING, it only deals with the 4096 first majors and 256 first minors */ +#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) +#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) +#define minor(dev) ((unsigned int)((dev) & 0xff)) + +#endif /* _NOLIBC_SYS_SYSMACROS_H */ diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h new file mode 100644 index 000000000000..33782a19aae9 --- /dev/null +++ b/tools/include/nolibc/sys/time.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * time definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_TIME_H +#define _NOLIBC_SYS_TIME_H + +#include "../arch.h" +#include "../sys.h" + +static int sys_clock_gettime(clockid_t clockid, struct timespec *tp); + +/* + * int gettimeofday(struct timeval *tv, struct timezone *tz); + */ + +static __attribute__((unused)) +int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ +#ifdef __NR_gettimeofday + return my_syscall2(__NR_gettimeofday, tv, tz); +#else + (void) tz; /* Non-NULL tz is undefined behaviour */ + + struct timespec tp; + int ret; + + ret = sys_clock_gettime(CLOCK_REALTIME, &tp); + if (!ret && tv) { + tv->tv_sec = tp.tv_sec; + tv->tv_usec = tp.tv_nsec / 1000; + } + + return ret; +#endif +} + +static __attribute__((unused)) +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return __sysret(sys_gettimeofday(tv, tz)); +} + +#endif /* _NOLIBC_SYS_TIME_H */ diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h new file mode 100644 index 000000000000..4375d546ba58 --- /dev/null +++ b/tools/include/nolibc/sys/timerfd.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * timerfd definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_TIMERFD_H +#define _NOLIBC_SYS_TIMERFD_H + +#include "../sys.h" +#include "../time.h" + +#include <linux/timerfd.h> + + +static __attribute__((unused)) +int sys_timerfd_create(int clockid, int flags) +{ + return my_syscall2(__NR_timerfd_create, clockid, flags); +} + +static __attribute__((unused)) +int timerfd_create(int clockid, int flags) +{ + return __sysret(sys_timerfd_create(clockid, flags)); +} + + +static __attribute__((unused)) +int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) +{ +#if defined(__NR_timerfd_gettime) + return my_syscall2(__NR_timerfd_gettime, fd, curr_value); +#elif defined(__NR_timerfd_gettime64) + struct __kernel_itimerspec kcurr_value; + int ret; + + ret = my_syscall2(__NR_timerfd_gettime64, fd, &kcurr_value); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); + return ret; +#else + return __nolibc_enosys(__func__, fd, curr_value); +#endif +} + +static __attribute__((unused)) +int timerfd_gettime(int fd, struct itimerspec *curr_value) +{ + return __sysret(sys_timerfd_gettime(fd, curr_value)); +} + + +static __attribute__((unused)) +int sys_timerfd_settime(int fd, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ +#if defined(__NR_timerfd_settime) + return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); +#elif defined(__NR_timerfd_settime64) + struct __kernel_itimerspec knew_value, kold_value; + int ret; + + __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value); + __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval); + ret = my_syscall4(__NR_timerfd_settime64, fd, flags, &knew_value, &kold_value); + if (old_value) { + __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval); + __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); + } + return ret; +#else + return __nolibc_enosys(__func__, fd, flags, new_value, old_value); +#endif +} + +static __attribute__((unused)) +int timerfd_settime(int fd, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + return __sysret(sys_timerfd_settime(fd, flags, new_value, old_value)); +} + +#endif /* _NOLIBC_SYS_TIMERFD_H */ diff --git a/tools/include/nolibc/sys/types.h b/tools/include/nolibc/sys/types.h new file mode 100644 index 000000000000..8a264a13275c --- /dev/null +++ b/tools/include/nolibc/sys/types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * sys/types.h shim for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +#include "../types.h" diff --git a/tools/include/nolibc/sys/utsname.h b/tools/include/nolibc/sys/utsname.h new file mode 100644 index 000000000000..01023e1bb439 --- /dev/null +++ b/tools/include/nolibc/sys/utsname.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Utsname definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_UTSNAME_H +#define _NOLIBC_SYS_UTSNAME_H + +#include "../sys.h" + +#include <linux/utsname.h> + +/* + * int uname(struct utsname *buf); + */ + +struct utsname { + char sysname[65]; + char nodename[65]; + char release[65]; + char version[65]; + char machine[65]; + char domainname[65]; +}; + +static __attribute__((unused)) +int sys_uname(struct utsname *buf) +{ + return my_syscall1(__NR_uname, buf); +} + +static __attribute__((unused)) +int uname(struct utsname *buf) +{ + return __sysret(sys_uname(buf)); +} + +#endif /* _NOLIBC_SYS_UTSNAME_H */ diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h new file mode 100644 index 000000000000..4d44e3da0ba8 --- /dev/null +++ b/tools/include/nolibc/sys/wait.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * wait definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_WAIT_H +#define _NOLIBC_SYS_WAIT_H + +#include "../arch.h" +#include "../std.h" +#include "../types.h" + +/* + * pid_t wait(int *status); + * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); + * pid_t waitpid(pid_t pid, int *status, int options); + * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); + */ + +static __attribute__((unused)) +pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ +#ifdef __NR_wait4 + return my_syscall4(__NR_wait4, pid, status, options, rusage); +#else + return __nolibc_enosys(__func__, pid, status, options, rusage); +#endif +} + +static __attribute__((unused)) +pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + return __sysret(sys_wait4(pid, status, options, rusage)); +} + +static __attribute__((unused)) +int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) +{ + return my_syscall5(__NR_waitid, which, pid, infop, options, rusage); +} + +static __attribute__((unused)) +int waitid(int which, pid_t pid, siginfo_t *infop, int options) +{ + return __sysret(sys_waitid(which, pid, infop, options, NULL)); +} + + +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + int idtype, ret; + siginfo_t info; + pid_t id; + + if (pid == INT_MIN) { + SET_ERRNO(ESRCH); + return -1; + } else if (pid < -1) { + idtype = P_PGID; + id = -pid; + } else if (pid == -1) { + idtype = P_ALL; + id = 0; + } else if (pid == 0) { + idtype = P_PGID; + id = 0; + } else { + idtype = P_PID; + id = pid; + } + + options |= WEXITED; + + ret = waitid(idtype, id, &info, options); + if (ret) + return ret; + + switch (info.si_code) { + case 0: + *status = 0; + break; + case CLD_EXITED: + *status = (info.si_status & 0xff) << 8; + break; + case CLD_KILLED: + *status = info.si_status & 0x7f; + break; + case CLD_DUMPED: + *status = (info.si_status & 0x7f) | 0x80; + break; + case CLD_STOPPED: + case CLD_TRAPPED: + *status = (info.si_status << 8) + 0x7f; + break; + case CLD_CONTINUED: + *status = 0xffff; + break; + default: + return -1; + } + + return info.si_pid; +} + +static __attribute__((unused)) +pid_t wait(int *status) +{ + return waitpid(-1, status, 0); +} + +#endif /* _NOLIBC_SYS_WAIT_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index 84655361b9ad..fc387940d51f 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_TIME_H #define _NOLIBC_TIME_H @@ -12,6 +15,106 @@ #include "types.h" #include "sys.h" +#include <linux/signal.h> +#include <linux/time.h> + +static __inline__ +void __nolibc_timespec_user_to_kernel(const struct timespec *ts, struct __kernel_timespec *kts) +{ + kts->tv_sec = ts->tv_sec; + kts->tv_nsec = ts->tv_nsec; +} + +static __inline__ +void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struct timespec *ts) +{ + ts->tv_sec = kts->tv_sec; + ts->tv_nsec = kts->tv_nsec; +} + +/* + * int clock_getres(clockid_t clockid, struct timespec *res); + * int clock_gettime(clockid_t clockid, struct timespec *tp); + * int clock_settime(clockid_t clockid, const struct timespec *tp); + */ + +static __attribute__((unused)) +int sys_clock_getres(clockid_t clockid, struct timespec *res) +{ +#if defined(__NR_clock_getres) + return my_syscall2(__NR_clock_getres, clockid, res); +#elif defined(__NR_clock_getres_time64) + struct __kernel_timespec kres; + int ret; + + ret = my_syscall2(__NR_clock_getres_time64, clockid, &kres); + if (res) + __nolibc_timespec_kernel_to_user(&kres, res); + return ret; +#else + return __nolibc_enosys(__func__, clockid, res); +#endif +} + +static __attribute__((unused)) +int clock_getres(clockid_t clockid, struct timespec *res) +{ + return __sysret(sys_clock_getres(clockid, res)); +} + +static __attribute__((unused)) +int sys_clock_gettime(clockid_t clockid, struct timespec *tp) +{ +#if defined(__NR_clock_gettime) + return my_syscall2(__NR_clock_gettime, clockid, tp); +#elif defined(__NR_clock_gettime64) + struct __kernel_timespec ktp; + int ret; + + ret = my_syscall2(__NR_clock_gettime64, clockid, &ktp); + if (tp) + __nolibc_timespec_kernel_to_user(&ktp, tp); + return ret; +#else + return __nolibc_enosys(__func__, clockid, tp); +#endif +} + +static __attribute__((unused)) +int clock_gettime(clockid_t clockid, struct timespec *tp) +{ + return __sysret(sys_clock_gettime(clockid, tp)); +} + +static __attribute__((unused)) +int sys_clock_settime(clockid_t clockid, struct timespec *tp) +{ +#if defined(__NR_clock_settime) + return my_syscall2(__NR_clock_settime, clockid, tp); +#elif defined(__NR_clock_settime64) + struct __kernel_timespec ktp; + + __nolibc_timespec_user_to_kernel(tp, &ktp); + return my_syscall2(__NR_clock_settime64, clockid, &ktp); +#else + return __nolibc_enosys(__func__, clockid, tp); +#endif +} + +static __attribute__((unused)) +int clock_settime(clockid_t clockid, struct timespec *tp) +{ + return __sysret(sys_clock_settime(clockid, tp)); +} + + +static __inline__ +double difftime(time_t time1, time_t time2) +{ + return time1 - time2; +} + + static __attribute__((unused)) time_t time(time_t *tptr) { @@ -25,7 +128,89 @@ time_t time(time_t *tptr) return tv.tv_sec; } -/* make sure to include all global symbols */ -#include "nolibc.h" + +/* + * int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid); + * int timer_gettime(timer_t timerid, struct itimerspec *curr_value); + * int timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value); + */ + +static __attribute__((unused)) +int sys_timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid) +{ + return my_syscall3(__NR_timer_create, clockid, evp, timerid); +} + +static __attribute__((unused)) +int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid) +{ + return __sysret(sys_timer_create(clockid, evp, timerid)); +} + +static __attribute__((unused)) +int sys_timer_delete(timer_t timerid) +{ + return my_syscall1(__NR_timer_delete, timerid); +} + +static __attribute__((unused)) +int timer_delete(timer_t timerid) +{ + return __sysret(sys_timer_delete(timerid)); +} + +static __attribute__((unused)) +int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value) +{ +#if defined(__NR_timer_gettime) + return my_syscall2(__NR_timer_gettime, timerid, curr_value); +#elif defined(__NR_timer_gettime64) + struct __kernel_itimerspec kcurr_value; + int ret; + + ret = my_syscall2(__NR_timer_gettime64, timerid, &kcurr_value); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); + return ret; +#else + return __nolibc_enosys(__func__, timerid, curr_value); +#endif +} + +static __attribute__((unused)) +int timer_gettime(timer_t timerid, struct itimerspec *curr_value) +{ + return __sysret(sys_timer_gettime(timerid, curr_value)); +} + +static __attribute__((unused)) +int sys_timer_settime(timer_t timerid, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ +#if defined(__NR_timer_settime) + return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value); +#elif defined(__NR_timer_settime64) + struct __kernel_itimerspec knew_value, kold_value; + int ret; + + __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value); + __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval); + ret = my_syscall4(__NR_timer_settime64, timerid, flags, &knew_value, &kold_value); + if (old_value) { + __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval); + __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); + } + return ret; +#else + return __nolibc_enosys(__func__, timerid, flags, new_value, old_value); +#endif +} + +static __attribute__((unused)) +int timer_settime(timer_t timerid, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + return __sysret(sys_timer_settime(timerid, flags, new_value, old_value)); +} #endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index b26a5d0c417c..30904be544ed 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -4,16 +4,17 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_TYPES_H #define _NOLIBC_TYPES_H #include "std.h" #include <linux/mman.h> -#include <linux/reboot.h> /* for LINUX_REBOOT_* */ #include <linux/stat.h> #include <linux/time.h> #include <linux/wait.h> -#include <linux/resource.h> /* Only the generic macros and types may be defined here. The arch-specific @@ -156,20 +157,6 @@ typedef struct { __set->fds[__idx] = 0; \ } while (0) -/* for poll() */ -#define POLLIN 0x0001 -#define POLLPRI 0x0002 -#define POLLOUT 0x0004 -#define POLLERR 0x0008 -#define POLLHUP 0x0010 -#define POLLNVAL 0x0020 - -struct pollfd { - int fd; - short int events; - short int revents; -}; - /* for getdents64() */ struct linux_dirent64 { uint64_t d_ino; @@ -198,14 +185,8 @@ struct stat { union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */ }; -/* WARNING, it only deals with the 4096 first majors and 256 first minors */ -#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) -#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) -#define minor(dev) ((unsigned int)(((dev) & 0xff)) - -#ifndef offsetof -#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) -#endif +typedef __kernel_clockid_t clockid_t; +typedef int timer_t; #ifndef container_of #define container_of(PTR, TYPE, FIELD) ({ \ @@ -214,7 +195,4 @@ struct stat { }) #endif -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index e38f3660c051..25bfc7732ec7 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_UNISTD_H #define _NOLIBC_UNISTD_H @@ -17,6 +20,34 @@ #define STDOUT_FILENO 1 #define STDERR_FILENO 2 +#define F_OK 0 +#define X_OK 1 +#define W_OK 2 +#define R_OK 4 + +/* + * int access(const char *path, int amode); + * int faccessat(int fd, const char *path, int amode, int flag); + */ + +static __attribute__((unused)) +int sys_faccessat(int fd, const char *path, int amode, int flag) +{ + return my_syscall4(__NR_faccessat, fd, path, amode, flag); +} + +static __attribute__((unused)) +int faccessat(int fd, const char *path, int amode, int flag) +{ + return __sysret(sys_faccessat(fd, path, amode, flag)); +} + +static __attribute__((unused)) +int access(const char *path, int amode) +{ + return faccessat(AT_FDCWD, path, amode, 0); +} + static __attribute__((unused)) int msleep(unsigned int msecs) @@ -56,13 +87,4 @@ int tcsetpgrp(int fd, pid_t pid) return ioctl(fd, TIOCSPGRP, &pid); } -#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N -#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) -#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) -#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) -#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) - -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_UNISTD_H */ diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86..ef1c27fa3c57 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index ffff554a5230..f333a0ac4ee4 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -119,14 +119,33 @@ #define SO_DETACH_REUSEPORT_BPF 68 +#define SO_PREFER_BUSY_POLL 69 +#define SO_BUSY_POLL_BUDGET 70 + +#define SO_NETNS_COOKIE 71 + +#define SO_BUF_LOCK 72 + +#define SO_RESERVE_MEM 73 + +#define SO_TXREHASH 74 + #define SO_RCVMARK 75 #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 -#define SCM_TS_OPT_ID 78 +#define SO_DEVMEM_LINEAR 78 +#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR +#define SO_DEVMEM_DMABUF 79 +#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF +#define SO_DEVMEM_DONTNEED 80 + +#define SCM_TS_OPT_ID 81 + +#define SO_RCVPRIORITY 82 -#define SO_RCVPRIORITY 79 +#define SO_PASSRIGHTS 83 #if !defined(__KERNEL__) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 88dc393c2bca..2892a45023af 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 468 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2acf9b336371..85180e4aaa5a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -51,6 +51,9 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +#define BPF_LOAD_ACQ 0x100 /* load-acquire */ +#define BPF_STORE_REL 0x110 /* store-release */ + enum bpf_cond_pseudo_jmp { BPF_MAY_GOTO = 0, }; @@ -1207,6 +1210,7 @@ enum bpf_perf_event_type { #define BPF_F_BEFORE (1U << 3) #define BPF_F_AFTER (1U << 4) #define BPF_F_ID (1U << 5) +#define BPF_F_PREORDER (1U << 6) #define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the @@ -1502,7 +1506,7 @@ union bpf_attr { __s32 map_token_fd; }; - struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ __u32 map_fd; __aligned_u64 key; union { @@ -1648,6 +1652,7 @@ union bpf_attr { }; __u32 next_id; __u32 open_flags; + __s32 fd_by_id_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */ @@ -1990,11 +1995,15 @@ union bpf_attr { * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). + * associated to *skb*, at *offset*. The *flags* are a combination + * of the following values: + * + * **BPF_F_RECOMPUTE_CSUM** + * Automatically update *skb*\ **->csum** after storing the + * bytes. + * **BPF_F_INVALIDATE_HASH** + * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\ + * **->l4hash** to 0. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers @@ -2046,7 +2055,7 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -4963,6 +4972,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The @@ -6019,7 +6031,10 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ - /* */ + /* This helper list is effectively frozen. If you are trying to \ + * add a new helper, you should add a kfunc instead which has \ + * less stability guarantees. See Documentation/bpf/kfuncs.rst \ + */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't * know or care about integer value that is now passed as second argument @@ -6712,6 +6727,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from file_name */ __u64 cookie; + __u64 ref_ctr_offset; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ @@ -6913,6 +6929,12 @@ enum { BPF_SOCK_OPS_ALL_CB_FLAGS = 0x7F, }; +enum { + SK_BPF_CB_TX_TIMESTAMPING = 1<<0, + SK_BPF_CB_MASK = (SK_BPF_CB_TX_TIMESTAMPING - 1) | + SK_BPF_CB_TX_TIMESTAMPING +}; + /* List of known BPF sock_ops operators. * New entries can only be added at the end */ @@ -7025,6 +7047,29 @@ enum { * by the kernel or the * earlier bpf-progs. */ + BPF_SOCK_OPS_TSTAMP_SCHED_CB, /* Called when skb is passing + * through dev layer when + * SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ + BPF_SOCK_OPS_TSTAMP_SND_SW_CB, /* Called when skb is about to send + * to the nic when SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ + BPF_SOCK_OPS_TSTAMP_SND_HW_CB, /* Called in hardware phase when + * SK_BPF_CB_TX_TIMESTAMPING feature + * is on. + */ + BPF_SOCK_OPS_TSTAMP_ACK_CB, /* Called when all the skbs in the + * same sendmsg call are acked + * when SK_BPF_CB_TX_TIMESTAMPING + * feature is on. + */ + BPF_SOCK_OPS_TSTAMP_SENDMSG_CB, /* Called when every sendmsg syscall + * is triggered. It's used to correlate + * sendmsg timestamp with corresponding + * tskey. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect @@ -7091,6 +7136,7 @@ enum { TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ + SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */ }; enum { diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index ec1798b6d3ff..266d4ffa6c07 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -36,7 +36,8 @@ struct btf_type { * bits 24-28: kind (e.g. int, ptr, array...etc) * bits 29-30: unused * bit 31: kind_flag, currently used by - * struct, union, enum, fwd and enum64 + * struct, union, enum, fwd, enum64, + * decl_tag and type_tag */ __u32 info; /* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64. diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index e16be0d37746..b8f629ef135f 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -33,7 +33,7 @@ * Missing asm support * * __BIT128() would not work in the asm code, as it shifts an - * 'unsigned __init128' data type as direct representation of + * 'unsigned __int128' data type as direct representation of * 128 bit constants is not supported in the gcc compiler, as * they get silently truncated. * diff --git a/tools/include/uapi/linux/elf.h b/tools/include/uapi/linux/elf.h new file mode 100644 index 000000000000..5834b83d7f9a --- /dev/null +++ b/tools/include/uapi/linux/elf.h @@ -0,0 +1,524 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_ELF_H +#define _LINUX_ELF_H + +#include <linux/types.h> +#include <linux/elf-em.h> + +/* 32-bit ELF base types. */ +typedef __u32 Elf32_Addr; +typedef __u16 Elf32_Half; +typedef __u32 Elf32_Off; +typedef __s32 Elf32_Sword; +typedef __u32 Elf32_Word; +typedef __u16 Elf32_Versym; + +/* 64-bit ELF base types. */ +typedef __u64 Elf64_Addr; +typedef __u16 Elf64_Half; +typedef __s16 Elf64_SHalf; +typedef __u64 Elf64_Off; +typedef __s32 Elf64_Sword; +typedef __u32 Elf64_Word; +typedef __u64 Elf64_Xword; +typedef __s64 Elf64_Sxword; +typedef __u16 Elf64_Versym; + +/* These constants are for the segment types stored in the image headers */ +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_TLS 7 /* Thread local storage segment */ +#define PT_LOOS 0x60000000 /* OS-specific */ +#define PT_HIOS 0x6fffffff /* OS-specific */ +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff +#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550) +#define PT_GNU_STACK (PT_LOOS + 0x474e551) +#define PT_GNU_RELRO (PT_LOOS + 0x474e552) +#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) + + +/* ARM MTE memory tag segment type */ +#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2) + +/* + * Extended Numbering + * + * If the real number of program header table entries is larger than + * or equal to PN_XNUM(0xffff), it is set to sh_info field of the + * section header at index 0, and PN_XNUM is set to e_phnum + * field. Otherwise, the section header at index 0 is zero + * initialized, if it exists. + * + * Specifications are available in: + * + * - Oracle: Linker and Libraries. + * Part No: 817–1984–19, August 2011. + * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf + * + * - System V ABI AMD64 Architecture Processor Supplement + * Draft Version 0.99.4, + * January 13, 2010. + * http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf + */ +#define PN_XNUM 0xffff + +/* These constants define the different elf file types */ +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff + +/* This is the info that is needed to parse the dynamic section of the file */ +#define DT_NULL 0 +#define DT_NEEDED 1 +#define DT_PLTRELSZ 2 +#define DT_PLTGOT 3 +#define DT_HASH 4 +#define DT_STRTAB 5 +#define DT_SYMTAB 6 +#define DT_RELA 7 +#define DT_RELASZ 8 +#define DT_RELAENT 9 +#define DT_STRSZ 10 +#define DT_SYMENT 11 +#define DT_INIT 12 +#define DT_FINI 13 +#define DT_SONAME 14 +#define DT_RPATH 15 +#define DT_SYMBOLIC 16 +#define DT_REL 17 +#define DT_RELSZ 18 +#define DT_RELENT 19 +#define DT_PLTREL 20 +#define DT_DEBUG 21 +#define DT_TEXTREL 22 +#define DT_JMPREL 23 +#define DT_ENCODING 32 +#define OLD_DT_LOOS 0x60000000 +#define DT_LOOS 0x6000000d +#define DT_HIOS 0x6ffff000 +#define DT_VALRNGLO 0x6ffffd00 +#define DT_VALRNGHI 0x6ffffdff +#define DT_ADDRRNGLO 0x6ffffe00 +#define DT_GNU_HASH 0x6ffffef5 +#define DT_ADDRRNGHI 0x6ffffeff +#define DT_VERSYM 0x6ffffff0 +#define DT_RELACOUNT 0x6ffffff9 +#define DT_RELCOUNT 0x6ffffffa +#define DT_FLAGS_1 0x6ffffffb +#define DT_VERDEF 0x6ffffffc +#define DT_VERDEFNUM 0x6ffffffd +#define DT_VERNEED 0x6ffffffe +#define DT_VERNEEDNUM 0x6fffffff +#define OLD_DT_HIOS 0x6fffffff +#define DT_LOPROC 0x70000000 +#define DT_HIPROC 0x7fffffff + +/* This info is needed when parsing the symbol table */ +#define STB_LOCAL 0 +#define STB_GLOBAL 1 +#define STB_WEAK 2 + +#define STN_UNDEF 0 + +#define STT_NOTYPE 0 +#define STT_OBJECT 1 +#define STT_FUNC 2 +#define STT_SECTION 3 +#define STT_FILE 4 +#define STT_COMMON 5 +#define STT_TLS 6 + +#define VER_FLG_BASE 0x1 +#define VER_FLG_WEAK 0x2 + +#define ELF_ST_BIND(x) ((x) >> 4) +#define ELF_ST_TYPE(x) ((x) & 0xf) +#define ELF32_ST_BIND(x) ELF_ST_BIND(x) +#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) +#define ELF64_ST_BIND(x) ELF_ST_BIND(x) +#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) + +typedef struct { + Elf32_Sword d_tag; + union { + Elf32_Sword d_val; + Elf32_Addr d_ptr; + } d_un; +} Elf32_Dyn; + +typedef struct { + Elf64_Sxword d_tag; /* entry tag value */ + union { + Elf64_Xword d_val; + Elf64_Addr d_ptr; + } d_un; +} Elf64_Dyn; + +/* The following are used with relocations */ +#define ELF32_R_SYM(x) ((x) >> 8) +#define ELF32_R_TYPE(x) ((x) & 0xff) + +#define ELF64_R_SYM(i) ((i) >> 32) +#define ELF64_R_TYPE(i) ((i) & 0xffffffff) + +typedef struct elf32_rel { + Elf32_Addr r_offset; + Elf32_Word r_info; +} Elf32_Rel; + +typedef struct elf64_rel { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ +} Elf64_Rel; + +typedef struct elf32_rela { + Elf32_Addr r_offset; + Elf32_Word r_info; + Elf32_Sword r_addend; +} Elf32_Rela; + +typedef struct elf64_rela { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ + Elf64_Sxword r_addend; /* Constant addend used to compute value */ +} Elf64_Rela; + +typedef struct elf32_sym { + Elf32_Word st_name; + Elf32_Addr st_value; + Elf32_Word st_size; + unsigned char st_info; + unsigned char st_other; + Elf32_Half st_shndx; +} Elf32_Sym; + +typedef struct elf64_sym { + Elf64_Word st_name; /* Symbol name, index in string tbl */ + unsigned char st_info; /* Type and binding attributes */ + unsigned char st_other; /* No defined meaning, 0 */ + Elf64_Half st_shndx; /* Associated section index */ + Elf64_Addr st_value; /* Value of the symbol */ + Elf64_Xword st_size; /* Associated symbol size */ +} Elf64_Sym; + + +#define EI_NIDENT 16 + +typedef struct elf32_hdr { + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; /* Entry point */ + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct elf64_hdr { + unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */ + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* These constants define the permissions on sections in the program + header, p_flags. */ +#define PF_R 0x4 +#define PF_W 0x2 +#define PF_X 0x1 + +typedef struct elf32_phdr { + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +typedef struct elf64_phdr { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment, file & memory */ +} Elf64_Phdr; + +/* sh_type */ +#define SHT_NULL 0 +#define SHT_PROGBITS 1 +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define SHT_RELA 4 +#define SHT_HASH 5 +#define SHT_DYNAMIC 6 +#define SHT_NOTE 7 +#define SHT_NOBITS 8 +#define SHT_REL 9 +#define SHT_SHLIB 10 +#define SHT_DYNSYM 11 +#define SHT_NUM 12 +#define SHT_LOPROC 0x70000000 +#define SHT_HIPROC 0x7fffffff +#define SHT_LOUSER 0x80000000 +#define SHT_HIUSER 0xffffffff + +/* sh_flags */ +#define SHF_WRITE 0x1 +#define SHF_ALLOC 0x2 +#define SHF_EXECINSTR 0x4 +#define SHF_RELA_LIVEPATCH 0x00100000 +#define SHF_RO_AFTER_INIT 0x00200000 +#define SHF_MASKPROC 0xf0000000 + +/* special section indexes */ +#define SHN_UNDEF 0 +#define SHN_LORESERVE 0xff00 +#define SHN_LOPROC 0xff00 +#define SHN_HIPROC 0xff1f +#define SHN_LIVEPATCH 0xff20 +#define SHN_ABS 0xfff1 +#define SHN_COMMON 0xfff2 +#define SHN_HIRESERVE 0xffff + +typedef struct elf32_shdr { + Elf32_Word sh_name; + Elf32_Word sh_type; + Elf32_Word sh_flags; + Elf32_Addr sh_addr; + Elf32_Off sh_offset; + Elf32_Word sh_size; + Elf32_Word sh_link; + Elf32_Word sh_info; + Elf32_Word sh_addralign; + Elf32_Word sh_entsize; +} Elf32_Shdr; + +typedef struct elf64_shdr { + Elf64_Word sh_name; /* Section name, index in string tbl */ + Elf64_Word sh_type; /* Type of section */ + Elf64_Xword sh_flags; /* Miscellaneous section attributes */ + Elf64_Addr sh_addr; /* Section virtual addr at execution */ + Elf64_Off sh_offset; /* Section file offset */ + Elf64_Xword sh_size; /* Size of section in bytes */ + Elf64_Word sh_link; /* Index of another section */ + Elf64_Word sh_info; /* Additional section information */ + Elf64_Xword sh_addralign; /* Section alignment */ + Elf64_Xword sh_entsize; /* Entry size if section holds table */ +} Elf64_Shdr; + +#define EI_MAG0 0 /* e_ident[] indexes */ +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_PAD 8 + +#define ELFMAG0 0x7f /* EI_MAG */ +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFMAG "\177ELF" +#define SELFMAG 4 + +#define ELFCLASSNONE 0 /* EI_CLASS */ +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define ELFCLASSNUM 3 + +#define ELFDATANONE 0 /* e_ident[EI_DATA] */ +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +#define EV_NONE 0 /* e_version, EI_VERSION */ +#define EV_CURRENT 1 +#define EV_NUM 2 + +#define ELFOSABI_NONE 0 +#define ELFOSABI_LINUX 3 + +#ifndef ELF_OSABI +#define ELF_OSABI ELFOSABI_NONE +#endif + +/* + * Notes used in ET_CORE. Architectures export some of the arch register sets + * using the corresponding note types via the PTRACE_GETREGSET and + * PTRACE_SETREGSET requests. + * The note name for these types is "LINUX", except NT_PRFPREG that is named + * "CORE". + */ +#define NT_PRSTATUS 1 +#define NT_PRFPREG 2 +#define NT_PRPSINFO 3 +#define NT_TASKSTRUCT 4 +#define NT_AUXV 6 +/* + * Note to userspace developers: size of NT_SIGINFO note may increase + * in the future to accomodate more fields, don't assume it is fixed! + */ +#define NT_SIGINFO 0x53494749 +#define NT_FILE 0x46494c45 +#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ +#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ +#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */ +#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ +#define NT_PPC_TAR 0x103 /* Target Address Register */ +#define NT_PPC_PPR 0x104 /* Program Priority Register */ +#define NT_PPC_DSCR 0x105 /* Data Stream Control Register */ +#define NT_PPC_EBB 0x106 /* Event Based Branch Registers */ +#define NT_PPC_PMU 0x107 /* Performance Monitor Registers */ +#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */ +#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */ +#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */ +#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */ +#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */ +#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */ +#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */ +#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */ +#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */ +#define NT_PPC_DEXCR 0x111 /* PowerPC DEXCR registers */ +#define NT_PPC_HASHKEYR 0x112 /* PowerPC HASHKEYR register */ +#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ +#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ +#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ +/* Old binutils treats 0x203 as a CET state */ +#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */ +#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */ +#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ +#define NT_S390_TIMER 0x301 /* s390 timer register */ +#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ +#define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */ +#define NT_S390_CTRS 0x304 /* s390 control registers */ +#define NT_S390_PREFIX 0x305 /* s390 prefix register */ +#define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ +#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */ +#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ +#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ +#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ +#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ +#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */ +#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */ +#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */ +#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ +#define NT_ARM_TLS 0x401 /* ARM TLS register */ +#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ +#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ +#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ +#define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ +#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ +#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ +#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ +#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ +#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ +#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ +#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ +#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ +#define NT_ARM_POE 0x40f /* ARM POE registers */ +#define NT_ARM_GCS 0x410 /* ARM GCS state */ +#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ +#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ +#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ +#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ +#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ +#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ +#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */ +#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ +#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ +#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ +#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */ +#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */ +#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */ +#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */ + +/* Note types with note name "GNU" */ +#define NT_GNU_PROPERTY_TYPE_0 5 + +/* Note header in a PT_NOTE section */ +typedef struct elf32_note { + Elf32_Word n_namesz; /* Name size */ + Elf32_Word n_descsz; /* Content size */ + Elf32_Word n_type; /* Content type */ +} Elf32_Nhdr; + +/* Note header in a PT_NOTE section */ +typedef struct elf64_note { + Elf64_Word n_namesz; /* Name size */ + Elf64_Word n_descsz; /* Content size */ + Elf64_Word n_type; /* Content type */ +} Elf64_Nhdr; + +/* .note.gnu.property types for EM_AARCH64: */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) + +typedef struct { + Elf32_Half vd_version; + Elf32_Half vd_flags; + Elf32_Half vd_ndx; + Elf32_Half vd_cnt; + Elf32_Word vd_hash; + Elf32_Word vd_aux; + Elf32_Word vd_next; +} Elf32_Verdef; + +typedef struct { + Elf64_Half vd_version; + Elf64_Half vd_flags; + Elf64_Half vd_ndx; + Elf64_Half vd_cnt; + Elf64_Word vd_hash; + Elf64_Word vd_aux; + Elf64_Word vd_next; +} Elf64_Verdef; + +typedef struct { + Elf32_Word vda_name; + Elf32_Word vda_next; +} Elf32_Verdaux; + +typedef struct { + Elf64_Word vda_name; + Elf64_Word vda_next; +} Elf64_Verdaux; + +#endif /* _LINUX_ELF_H */ diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h new file mode 100644 index 000000000000..e710967c7c26 --- /dev/null +++ b/tools/include/uapi/linux/fanotify.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FANOTIFY_H +#define _UAPI_LINUX_FANOTIFY_H + +#include <linux/types.h> + +/* the following events that user-space can register for */ +#define FAN_ACCESS 0x00000001 /* File was accessed */ +#define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_ATTRIB 0x00000004 /* Metadata changed */ +#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ +#define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FAN_CREATE 0x00000100 /* Subfile was created */ +#define FAN_DELETE 0x00000200 /* Subfile was deleted */ +#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FAN_MOVE_SELF 0x00000800 /* Self was moved */ +#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ + +#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FAN_FS_ERROR 0x00008000 /* Filesystem error */ + +#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ +#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ + +#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ +#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ + +#define FAN_RENAME 0x10000000 /* File was renamed */ + +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ + +/* helper events */ +#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */ + +/* flags used for fanotify_init() */ +#define FAN_CLOEXEC 0x00000001 +#define FAN_NONBLOCK 0x00000002 + +/* These are NOT bitwise flags. Both bits are used together. */ +#define FAN_CLASS_NOTIF 0x00000000 +#define FAN_CLASS_CONTENT 0x00000004 +#define FAN_CLASS_PRE_CONTENT 0x00000008 + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FAN_UNLIMITED_QUEUE 0x00000010 +#define FAN_UNLIMITED_MARKS 0x00000020 +#define FAN_ENABLE_AUDIT 0x00000040 + +/* Flags to determine fanotify event format */ +#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */ +#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ +#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ +#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ +#define FAN_REPORT_NAME 0x00000800 /* Report events with name */ +#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ +#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ + +/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ +#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) +/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ +#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ + FAN_REPORT_FID | FAN_REPORT_TARGET_FID) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ + FAN_UNLIMITED_MARKS) + +/* flags used for fanotify_modify_mark() */ +#define FAN_MARK_ADD 0x00000001 +#define FAN_MARK_REMOVE 0x00000002 +#define FAN_MARK_DONT_FOLLOW 0x00000004 +#define FAN_MARK_ONLYDIR 0x00000008 +/* FAN_MARK_MOUNT is 0x00000010 */ +#define FAN_MARK_IGNORED_MASK 0x00000020 +#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 +#define FAN_MARK_FLUSH 0x00000080 +/* FAN_MARK_FILESYSTEM is 0x00000100 */ +#define FAN_MARK_EVICTABLE 0x00000200 +/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ +#define FAN_MARK_IGNORE 0x00000400 + +/* These are NOT bitwise flags. Both bits can be used togther. */ +#define FAN_MARK_INODE 0x00000000 +#define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_FILESYSTEM 0x00000100 +#define FAN_MARK_MNTNS 0x00000110 + +/* + * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY + * for non-inode mark types. + */ +#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ + FAN_MARK_REMOVE |\ + FAN_MARK_DONT_FOLLOW |\ + FAN_MARK_ONLYDIR |\ + FAN_MARK_MOUNT |\ + FAN_MARK_IGNORED_MASK |\ + FAN_MARK_IGNORED_SURV_MODIFY |\ + FAN_MARK_FLUSH) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_EVENTS (FAN_ACCESS |\ + FAN_MODIFY |\ + FAN_CLOSE |\ + FAN_OPEN) + +/* + * All events which require a permission response from userspace + */ +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ + FAN_ACCESS_PERM) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ + FAN_ALL_PERM_EVENTS |\ + FAN_Q_OVERFLOW) + +#define FANOTIFY_METADATA_VERSION 3 + +struct fanotify_event_metadata { + __u32 event_len; + __u8 vers; + __u8 reserved; + __u16 metadata_len; + __aligned_u64 mask; + __s32 fd; + __s32 pid; +}; + +#define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 +#define FAN_EVENT_INFO_TYPE_DFID 3 +#define FAN_EVENT_INFO_TYPE_PIDFD 4 +#define FAN_EVENT_INFO_TYPE_ERROR 5 +#define FAN_EVENT_INFO_TYPE_RANGE 6 +#define FAN_EVENT_INFO_TYPE_MNT 7 + +/* Special info types for FAN_RENAME */ +#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 +/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ +#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 +/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ + +/* Variable length info record following event metadata */ +struct fanotify_event_info_header { + __u8 info_type; + __u8 pad; + __u16 len; +}; + +/* + * Unique file identifier info record. + * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID, + * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME. + * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated + * name immediately after the file handle. + */ +struct fanotify_event_info_fid { + struct fanotify_event_info_header hdr; + __kernel_fsid_t fsid; + /* + * Following is an opaque struct file_handle that can be passed as + * an argument to open_by_handle_at(2). + */ + unsigned char handle[]; +}; + +/* + * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD. + * It holds a pidfd for the pid that was responsible for generating an event. + */ +struct fanotify_event_info_pidfd { + struct fanotify_event_info_header hdr; + __s32 pidfd; +}; + +struct fanotify_event_info_error { + struct fanotify_event_info_header hdr; + __s32 error; + __u32 error_count; +}; + +struct fanotify_event_info_range { + struct fanotify_event_info_header hdr; + __u32 pad; + __u64 offset; + __u64 count; +}; + +struct fanotify_event_info_mnt { + struct fanotify_event_info_header hdr; + __u64 mnt_id; +}; + +/* + * User space may need to record additional information about its decision. + * The extra information type records what kind of information is included. + * The default is none. We also define an extra information buffer whose + * size is determined by the extra information type. + * + * If the information type is Audit Rule, then the information following + * is the rule number that triggered the user space decision that + * requires auditing. + */ + +#define FAN_RESPONSE_INFO_NONE 0 +#define FAN_RESPONSE_INFO_AUDIT_RULE 1 + +struct fanotify_response { + __s32 fd; + __u32 response; +}; + +struct fanotify_response_info_header { + __u8 type; + __u8 pad; + __u16 len; +}; + +struct fanotify_response_info_audit_rule { + struct fanotify_response_info_header hdr; + __u32 rule_number; + __u32 subj_trust; + __u32 obj_trust; +}; + +/* Legit userspace responses to a _PERM event */ +#define FAN_ALLOW 0x01 +#define FAN_DENY 0x02 +/* errno other than EPERM can specified in upper byte of deny response */ +#define FAN_ERRNO_BITS 8 +#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS) +#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1) +#define FAN_DENY_ERRNO(err) \ + (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT)) + +#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */ +#define FAN_INFO 0x20 /* Bitmask to indicate additional information */ + +/* No fd set in event */ +#define FAN_NOFD -1 +#define FAN_NOPIDFD FAN_NOFD +#define FAN_EPIDFD -2 + +/* Helper functions to deal with fanotify_event_metadata buffers */ +#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) + +#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \ + (struct fanotify_event_metadata*)(((char *)(meta)) + \ + (meta)->event_len)) + +#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len <= (long)(len)) + +#endif /* _UAPI_LINUX_FANOTIFY_H */ diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 42ec5ddaab8d..44f2bb93e7e6 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -7,8 +7,8 @@ * Magnus Karlsson <magnus.karlsson@intel.com> */ -#ifndef _LINUX_IF_XDP_H -#define _LINUX_IF_XDP_H +#ifndef _UAPI_LINUX_IF_XDP_H +#define _UAPI_LINUX_IF_XDP_H #include <linux/types.h> @@ -127,6 +127,12 @@ struct xdp_options { */ #define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) +/* Request launch time hardware offload. The device will schedule the packet for + * transmission at a pre-determined time called launch time. The value of + * launch time is communicated via launch_time field of struct xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_LAUNCH_TIME (1 << 2) + /* AF_XDP offloads request. 'request' union member is consumed by the driver * when the packet is being transmitted. 'completion' union member is * filled by the driver when the transmit completion arrives. @@ -142,6 +148,10 @@ struct xsk_tx_metadata { __u16 csum_start; /* Offset from csum_start where checksum should be stored. */ __u16 csum_offset; + + /* XDP_TXMD_FLAGS_LAUNCH_TIME */ + /* Launch time in nanosecond against the PTP HW Clock */ + __u64 launch_time; } request; struct { @@ -170,4 +180,4 @@ struct xdp_desc { /* TX packet carries valid metadata. */ #define XDP_TX_METADATA (1 << 1) -#endif /* _LINUX_IF_XDP_H */ +#endif /* _UAPI_LINUX_IF_XDP_H */ diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 502ea63b5d2e..b6ae8ad8934b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -933,6 +929,7 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1070,6 +1067,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h new file mode 100644 index 000000000000..7fa67c2031a5 --- /dev/null +++ b/tools/include/uapi/linux/mount.h @@ -0,0 +1,235 @@ +#ifndef _UAPI_LINUX_MOUNT_H +#define _UAPI_LINUX_MOUNT_H + +#include <linux/types.h> + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + * + * Usage of these is restricted within the kernel to core mount(2) code and + * callers of sys_mount() only. Filesystems should be using the SB_* + * equivalent instead. + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) +#define MS_NOREMOTELOCK (1<<27) +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +/* + * Superblock flags that can be altered by MS_REMOUNT + */ +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) + +/* + * Old magic mount flag and mask + */ +#define MS_MGC_VAL 0xC0ED0000 +#define MS_MGC_MSK 0xffff0000 + +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */ +#define MOVE_MOUNT__MASK 0x00000377 + +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */ +}; + +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; + char str[]; /* Variable size part containing strings */ +}; + +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ + +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ + +#endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index e4be227d3ad6..7eb9571786b8 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -59,10 +59,13 @@ enum netdev_xdp_rx_metadata { * by the driver. * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the * driver. + * @NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO: Launch time HW offload is supported + * by the driver. */ enum netdev_xsk_flags { NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1, NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, + NETDEV_XSK_FLAGS_TX_LAUNCH_TIME_FIFO = 4, }; enum netdev_queue_type { @@ -87,6 +90,11 @@ enum { }; enum { + __NETDEV_A_IO_URING_PROVIDER_INFO_MAX, + NETDEV_A_IO_URING_PROVIDER_INFO_MAX = (__NETDEV_A_IO_URING_PROVIDER_INFO_MAX - 1) +}; + +enum { NETDEV_A_PAGE_POOL_ID = 1, NETDEV_A_PAGE_POOL_IFINDEX, NETDEV_A_PAGE_POOL_NAPI_ID, @@ -94,6 +102,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, NETDEV_A_PAGE_POOL_DMABUF, + NETDEV_A_PAGE_POOL_IO_URING, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -131,11 +140,18 @@ enum { }; enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + +enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, + NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -203,6 +219,7 @@ enum { NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, + NETDEV_CMD_BIND_TX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h new file mode 100644 index 000000000000..34127653fd00 --- /dev/null +++ b/tools/include/uapi/linux/nsfs.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_NSFS_H +#define __LINUX_NSFS_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +#define NSIO 0xb7 + +/* Returns a file descriptor that refers to an owning user namespace */ +#define NS_GET_USERNS _IO(NSIO, 0x1) +/* Returns a file descriptor that refers to a parent namespace */ +#define NS_GET_PARENT _IO(NSIO, 0x2) +/* Returns the type of namespace (CLONE_NEW* value) referred to by + file descriptor */ +#define NS_GET_NSTYPE _IO(NSIO, 0x3) +/* Get owner UID (in the caller's user namespace) for a user namespace */ +#define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Get the id for a mount namespace */ +#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) +/* Translate pid from target pid namespace into the caller's pid namespace. */ +#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) +/* Return thread-group leader id of pid in the callers pid namespace. */ +#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int) +/* Translate pid from caller's pid namespace into a target pid namespace. */ +#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int) +/* Return thread-group leader id of pid in the target pid namespace. */ +#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int) + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) + +#endif /* __LINUX_NSFS_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 0524d541d4e3..78a362b80027 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -39,18 +39,21 @@ enum perf_type_id { /* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID + * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + * + * If the PMU type ID is 0, PERF_TYPE_RAW will be applied. */ -#define PERF_PMU_TYPE_SHIFT 32 -#define PERF_HW_EVENT_MASK 0xffffffff +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff /* * Generalized performance event event_id types, used by the @@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id { /* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various - * physical and sw events of the kernel (and allow the profiling of them as + * physical and SW events of the kernel (and allow the profiling of them as * well): */ enum perf_sw_ids { @@ -167,8 +170,9 @@ enum perf_event_sample_format { }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + /* - * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do @@ -178,20 +182,20 @@ enum perf_event_sample_format { * of branches and therefore it supersedes all the other types. */ enum perf_branch_sample_type_shift { - PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ @@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift { }; enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, - PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, - PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, - PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, - PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, - PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, - PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, - PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, - PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, - PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, - PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, - PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, - PERF_SAMPLE_BRANCH_TYPE_SAVE = - 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, - PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, - PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; /* - * Common flow change classification + * Common control flow change classifications: */ enum { - PERF_BR_UNKNOWN = 0, /* unknown */ - PERF_BR_COND = 1, /* conditional */ - PERF_BR_UNCOND = 2, /* unconditional */ - PERF_BR_IND = 3, /* indirect */ - PERF_BR_CALL = 4, /* function call */ - PERF_BR_IND_CALL = 5, /* indirect function call */ - PERF_BR_RET = 6, /* function return */ - PERF_BR_SYSCALL = 7, /* syscall */ - PERF_BR_SYSRET = 8, /* syscall return */ - PERF_BR_COND_CALL = 9, /* conditional function call */ - PERF_BR_COND_RET = 10, /* conditional function return */ - PERF_BR_ERET = 11, /* exception return */ - PERF_BR_IRQ = 12, /* irq */ - PERF_BR_SERROR = 13, /* system error */ - PERF_BR_NO_TX = 14, /* not in transaction */ - PERF_BR_EXTEND_ABI = 15, /* extend ABI */ + PERF_BR_UNKNOWN = 0, /* Unknown */ + PERF_BR_COND = 1, /* Conditional */ + PERF_BR_UNCOND = 2, /* Unconditional */ + PERF_BR_IND = 3, /* Indirect */ + PERF_BR_CALL = 4, /* Function call */ + PERF_BR_IND_CALL = 5, /* Indirect function call */ + PERF_BR_RET = 6, /* Function return */ + PERF_BR_SYSCALL = 7, /* Syscall */ + PERF_BR_SYSRET = 8, /* Syscall return */ + PERF_BR_COND_CALL = 9, /* Conditional function call */ + PERF_BR_COND_RET = 10, /* Conditional function return */ + PERF_BR_ERET = 11, /* Exception return */ + PERF_BR_IRQ = 12, /* IRQ */ + PERF_BR_SERROR = 13, /* System error */ + PERF_BR_NO_TX = 14, /* Not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* Extend ABI */ PERF_BR_MAX, }; /* - * Common branch speculation outcome classification + * Common branch speculation outcome classifications: */ enum { - PERF_BR_SPEC_NA = 0, /* Not available */ - PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ - PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ - PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ PERF_BR_SPEC_MAX, }; enum { - PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ - PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ - PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ - PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ - PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ - PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ - PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ - PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ PERF_BR_NEW_MAX, }; enum { - PERF_BR_PRIV_UNKNOWN = 0, - PERF_BR_PRIV_USER = 1, - PERF_BR_PRIV_KERNEL = 2, - PERF_BR_PRIV_HV = 3, + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, }; -#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 -#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 -#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 -#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 -#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ @@ -310,9 +313,9 @@ enum { * Values to determine ABI of the registers dump. */ enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, }; /* @@ -320,21 +323,21 @@ enum perf_sample_regs_abi { * abort events. Multiple bits can be set. */ enum { - PERF_TXN_ELISION = (1 << 0), /* From elision */ - PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ - PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ - PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ - PERF_TXN_RETRY = (1 << 4), /* Retry possible */ - PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ - PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ - PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ - PERF_TXN_MAX = (1 << 8), /* non-ABI */ + PERF_TXN_MAX = (1 << 8), /* non-ABI */ - /* bits 32..63 are reserved for the abort code */ + /* Bits 32..63 are reserved for the abort code */ - PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), - PERF_TXN_ABORT_SHIFT = 32, + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, }; /* @@ -369,22 +372,22 @@ enum perf_event_read_format { PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ -#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ - /* add: sample_stack_user */ -#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ -#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ -#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ -#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ -#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ +#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */ +#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */ + /* Add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ /* - * Hardware event_id to monitor via a performance monitoring event: - * - * @sample_max_stack: Max number of frame pointers in a callchain, - * should be < /proc/sys/kernel/perf_event_max_stack + * 'struct perf_event_attr' contains various attributes that define + * a performance event - most of them hardware related configuration + * details, but also a lot of behavioral switches and values implemented + * by the kernel. */ struct perf_event_attr { @@ -394,7 +397,7 @@ struct perf_event_attr { __u32 type; /* - * Size of the attr structure, for fwd/bwd compat. + * Size of the attr structure, for forward/backwards compatibility. */ __u32 size; @@ -449,21 +452,21 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - write_backward : 1, /* Write ring buffer from end to beginning */ + write_backward : 1, /* write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - bpf_event : 1, /* include bpf events */ + bpf_event : 1, /* include BPF events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - build_id : 1, /* use build id in mmap2 events */ + build_id : 1, /* use build ID in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ __reserved_1 : 26; union { - __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_events; /* wake up every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; @@ -472,13 +475,13 @@ struct perf_event_attr { __u64 bp_addr; __u64 kprobe_func; /* for perf_kprobe */ __u64 uprobe_path; /* for perf_uprobe */ - __u64 config1; /* extension of config */ + __u64 config1; /* extension of config */ }; union { __u64 bp_len; - __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 kprobe_addr; /* when kprobe_func == NULL */ __u64 probe_offset; /* for perf_[k,u]probe */ - __u64 config2; /* extension of config1 */ + __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ @@ -508,7 +511,16 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; + + /* + * Max number of frame pointers in a callchain, should be + * lower than /proc/sys/kernel/perf_event_max_stack. + * + * Max number of entries of branch stack should be lower + * than the hardware limit. + */ __u16 sample_max_stack; + __u16 __reserved_2; __u32 aux_sample_size; @@ -535,7 +547,7 @@ struct perf_event_attr { /* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command - * to query bpf programs attached to the same perf tracepoint + * to query BPF programs attached to the same perf tracepoint * as the given perf event. */ struct perf_event_query_bpf { @@ -557,21 +569,21 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32) #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) -#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, + PERF_IOC_FLAG_GROUP = 1U << 0, }; /* @@ -582,7 +594,7 @@ struct perf_event_mmap_page { __u32 compat_version; /* lowest version this is compat with */ /* - * Bits needed to read the hw events in user-space. + * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; @@ -620,7 +632,7 @@ struct perf_event_mmap_page { __u32 index; /* hardware event identifier */ __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on cpu */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { @@ -648,7 +660,7 @@ struct perf_event_mmap_page { /* * If cap_usr_time the below fields can be used to compute the time - * delta since time_enabled (in ns) using rdtsc or similar. + * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; @@ -721,7 +733,7 @@ struct perf_event_mmap_page { * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data, after issueing + * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * @@ -737,7 +749,7 @@ struct perf_event_mmap_page { /* * AUX area is defined by aux_{offset,size} fields that should be set - * by the userspace, so that + * by the user-space, so that * * aux_offset >= data_offset + data_size * @@ -811,7 +823,7 @@ struct perf_event_mmap_page { * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: - * Indicates that mmap2 event carries build id data. + * Indicates that mmap2 event carries build ID data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) @@ -822,26 +834,26 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; + __u32 type; + __u16 misc; + __u16 size; }; struct perf_ns_link_info { - __u64 dev; - __u64 ino; + __u64 dev; + __u64 ino; }; enum { - NET_NS_INDEX = 0, - UTS_NS_INDEX = 1, - IPC_NS_INDEX = 2, - PID_NS_INDEX = 3, - USER_NS_INDEX = 4, - MNT_NS_INDEX = 5, - CGROUP_NS_INDEX = 6, - - NR_NAMESPACES, /* number of available namespaces */ + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ }; enum perf_event_type { @@ -857,11 +869,11 @@ enum perf_event_type { * optional fields being ignored. * * struct sample_id { - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * @@ -872,7 +884,7 @@ enum perf_event_type { /* * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: + * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; @@ -882,7 +894,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, @@ -892,7 +904,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -903,7 +915,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -914,7 +926,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -925,7 +937,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -937,7 +949,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -948,7 +960,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -1003,12 +1015,12 @@ enum perf_event_type { * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * - * { u64 abi; # enum perf_sample_regs_abi - * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * - * { u64 size; - * char data[size]; - * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { union perf_sample_weight * { @@ -1033,10 +1045,11 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR - * { u64 size; - * char data[size]; } && PERF_SAMPLE_AUX + * { u64 cgroup;} && PERF_SAMPLE_CGROUP * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, @@ -1068,7 +1081,7 @@ enum perf_event_type { * }; * u32 prot, flags; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP2 = 10, @@ -1077,12 +1090,12 @@ enum perf_event_type { * Records that new data landed in the AUX buffer part. * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u64 aux_offset; - * u64 aux_size; + * u64 aux_offset; + * u64 aux_size; * u64 flags; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_AUX = 11, @@ -1165,7 +1178,7 @@ enum perf_event_type { PERF_RECORD_KSYMBOL = 17, /* - * Record bpf events: + * Record BPF events: * enum perf_bpf_event_type { * PERF_BPF_EVENT_UNKNOWN = 0, * PERF_BPF_EVENT_PROG_LOAD = 1, @@ -1243,181 +1256,181 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) enum perf_bpf_event_type { - PERF_BPF_EVENT_UNKNOWN = 0, - PERF_BPF_EVENT_PROG_LOAD = 1, - PERF_BPF_EVENT_PROG_UNLOAD = 2, - PERF_BPF_EVENT_MAX, /* non-ABI */ + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 127 -#define PERF_MAX_CONTEXTS_PER_STACK 8 +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, - PERF_CONTEXT_MAX = (__u64)-4095, + PERF_CONTEXT_MAX = (__u64)-4095, }; /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ /* CoreSight PMU AUX buffer formats */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ -#define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#define PERF_FLAG_FD_OUTPUT (1UL << 1) -#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ #if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_op:5, /* type of opcode */ - mem_lvl:14, /* memory hierarchy level */ - mem_snoop:5, /* snoop mode */ - mem_lock:2, /* lock instr */ - mem_dtlb:7, /* tlb access */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_remote:1, /* remote */ - mem_snoopx:2, /* snoop mode, ext */ - mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + __u64 mem_op : 5, /* Type of opcode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_snoop : 5, /* Snoop mode */ + mem_lock : 2, /* Lock instr */ + mem_dtlb : 7, /* TLB access */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_remote : 1, /* Remote */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_blk : 3, /* Access blocked */ + mem_hops : 3, /* Hop level */ + mem_rsvd : 18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ - mem_blk:3, /* access blocked */ - mem_snoopx:2, /* snoop mode, ext */ - mem_remote:1, /* remote */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_dtlb:7, /* tlb access */ - mem_lock:2, /* lock instr */ - mem_snoop:5, /* snoop mode */ - mem_lvl:14, /* memory hierarchy level */ - mem_op:5; /* type of opcode */ + __u64 mem_rsvd : 18, + mem_hops : 3, /* Hop level */ + mem_blk : 3, /* Access blocked */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_remote : 1, /* Remote */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_dtlb : 7, /* TLB access */ + mem_lock : 2, /* Lock instr */ + mem_snoop : 5, /* Snoop mode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_op : 5; /* Type of opcode */ }; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif -/* type of opcode (load/store/prefetch,code) */ -#define PERF_MEM_OP_NA 0x01 /* not available */ -#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ -#define PERF_MEM_OP_STORE 0x04 /* store instruction */ -#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ -#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ -#define PERF_MEM_OP_SHIFT 0 +/* Type of memory opcode: */ +#define PERF_MEM_OP_NA 0x0001 /* Not available */ +#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */ +#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */ +#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */ +#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */ +#define PERF_MEM_OP_SHIFT 0 /* - * PERF_MEM_LVL_* namespace being depricated to some extent in the + * The PERF_MEM_LVL_* namespace is being deprecated to some extent in * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. + * We support this namespace in order to not break defined ABIs. * - * memory hierarchy (memory level, hit or miss) + * Memory hierarchy (memory level, hit or miss) */ -#define PERF_MEM_LVL_NA 0x01 /* not available */ -#define PERF_MEM_LVL_HIT 0x02 /* hit level */ -#define PERF_MEM_LVL_MISS 0x04 /* miss level */ -#define PERF_MEM_LVL_L1 0x08 /* L1 */ -#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ -#define PERF_MEM_LVL_L2 0x20 /* L2 */ -#define PERF_MEM_LVL_L3 0x40 /* L3 */ -#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ -#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ -#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ -#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ -#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ -#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ -#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ -#define PERF_MEM_LVL_SHIFT 5 - -#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ -#define PERF_MEM_REMOTE_SHIFT 37 - -#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ -#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ -#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ -#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ -/* 0x7 available */ -#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ -#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ -#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ -#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ -#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ - -#define PERF_MEM_LVLNUM_SHIFT 33 - -/* snoop mode */ -#define PERF_MEM_SNOOP_NA 0x01 /* not available */ -#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ -#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ -#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ -#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ -#define PERF_MEM_SNOOP_SHIFT 19 - -#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ -#define PERF_MEM_SNOOPX_SHIFT 38 - -/* locked instruction */ -#define PERF_MEM_LOCK_NA 0x01 /* not available */ -#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -#define PERF_MEM_LOCK_SHIFT 24 +#define PERF_MEM_LVL_NA 0x0001 /* Not available */ +#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */ +#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */ +#define PERF_MEM_LVL_L1 0x0008 /* L1 */ +#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x0020 /* L2 */ +#define PERF_MEM_LVL_L3 0x0040 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ +#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ +/* 0x007 available */ +#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + +/* Snoop mode */ +#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */ +#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */ +#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */ +#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 + +/* Locked instruction */ +#define PERF_MEM_LOCK_NA 0x0001 /* Not available */ +#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 /* TLB access */ -#define PERF_MEM_TLB_NA 0x01 /* not available */ -#define PERF_MEM_TLB_HIT 0x02 /* hit level */ -#define PERF_MEM_TLB_MISS 0x04 /* miss level */ -#define PERF_MEM_TLB_L1 0x08 /* L1 */ -#define PERF_MEM_TLB_L2 0x10 /* L2 */ -#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ -#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ -#define PERF_MEM_TLB_SHIFT 26 +#define PERF_MEM_TLB_NA 0x0001 /* Not available */ +#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */ +#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */ +#define PERF_MEM_TLB_L1 0x0008 /* L1 */ +#define PERF_MEM_TLB_L2 0x0010 /* L2 */ +#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 /* Access blocked */ -#define PERF_MEM_BLK_NA 0x01 /* not available */ -#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ -#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ -#define PERF_MEM_BLK_SHIFT 40 - -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_BLK_NA 0x0001 /* Not available */ +#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* Hop level */ +#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */ +#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */ +#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */ +#define PERF_MEM_HOPS_3 0x0004 /* Remote board */ /* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 +#define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* - * single taken branch record layout: + * Layout of single taken branch records: * * from: source instruction (may not always be a branch insn) * to: branch target @@ -1436,37 +1449,37 @@ union perf_mem_data_src { struct perf_branch_entry { __u64 from; __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - cycles:16, /* cycle count to last branch */ - type:4, /* branch type */ - spec:2, /* branch speculation info */ - new_type:4, /* additional branch type */ - priv:3, /* privilege level */ - reserved:31; + __u64 mispred : 1, /* target mispredicted */ + predicted : 1, /* target predicted */ + in_tx : 1, /* in transaction */ + abort : 1, /* transaction abort */ + cycles : 16, /* cycle count to last branch */ + type : 4, /* branch type */ + spec : 2, /* branch speculation info */ + new_type : 4, /* additional branch type */ + priv : 3, /* privilege level */ + reserved : 31; }; /* Size of used info bits in struct perf_branch_entry */ #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 union perf_sample_weight { - __u64 full; + __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) struct { - __u32 var1_dw; - __u16 var2_w; - __u16 var3_w; + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; }; #elif defined(__BIG_ENDIAN_BITFIELD) struct { - __u16 var3_w; - __u16 var2_w; - __u32 var1_dw; + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif }; diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 35791791a879..43dec6eed559 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 @@ -328,4 +331,44 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 887a25286441..f78ee3670dd5 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h index 91fa51a9c31d..85aa327245c6 100644 --- a/tools/include/uapi/linux/types.h +++ b/tools/include/uapi/linux/types.h @@ -4,6 +4,8 @@ #include <asm-generic/int-ll64.h> +#ifndef __ASSEMBLER__ + /* copied from linux:include/uapi/linux/types.h */ #define __bitwise typedef __u16 __bitwise __le16; @@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum; #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_LINUX_TYPES_H */ |