diff options
Diffstat (limited to 'tools/include')
27 files changed, 2532 insertions, 114 deletions
diff --git a/tools/include/asm-generic/io.h b/tools/include/asm-generic/io.h new file mode 100644 index 000000000000..e5a0b07ad452 --- /dev/null +++ b/tools/include/asm-generic/io.h @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_GENERIC_IO_H +#define _TOOLS_ASM_GENERIC_IO_H + +#include <asm/barrier.h> +#include <asm/byteorder.h> + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#ifndef mmiowb_set_pending +#define mmiowb_set_pending() do { } while (0) +#endif + +#ifndef __io_br +#define __io_br() barrier() +#endif + +/* prevent prefetching of coherent DMA data ahead of a dma-complete */ +#ifndef __io_ar +#ifdef rmb +#define __io_ar(v) rmb() +#else +#define __io_ar(v) barrier() +#endif +#endif + +/* flush writes to coherent DMA data before possibly triggering a DMA read */ +#ifndef __io_bw +#ifdef wmb +#define __io_bw() wmb() +#else +#define __io_bw() barrier() +#endif +#endif + +/* serialize device access against a spin_unlock, usually handled there. */ +#ifndef __io_aw +#define __io_aw() mmiowb_set_pending() +#endif + +#ifndef __io_pbw +#define __io_pbw() __io_bw() +#endif + +#ifndef __io_paw +#define __io_paw() __io_aw() +#endif + +#ifndef __io_pbr +#define __io_pbr() __io_br() +#endif + +#ifndef __io_par +#define __io_par(v) __io_ar(v) +#endif + +#ifndef _THIS_IP_ +#define _THIS_IP_ 0 +#endif + +static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_read_mmio(u8 width, const volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} + +/* + * __raw_{read,write}{b,w,l,q}() access memory in native endianness. + * + * On some architectures memory mapped IO needs to be accessed differently. + * On the simple architectures, we just read/write the memory location + * directly. + */ + +#ifndef __raw_readb +#define __raw_readb __raw_readb +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return *(const volatile u8 __force *)addr; +} +#endif + +#ifndef __raw_readw +#define __raw_readw __raw_readw +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + return *(const volatile u16 __force *)addr; +} +#endif + +#ifndef __raw_readl +#define __raw_readl __raw_readl +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + return *(const volatile u32 __force *)addr; +} +#endif + +#ifndef __raw_readq +#define __raw_readq __raw_readq +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + return *(const volatile u64 __force *)addr; +} +#endif + +#ifndef __raw_writeb +#define __raw_writeb __raw_writeb +static inline void __raw_writeb(u8 value, volatile void __iomem *addr) +{ + *(volatile u8 __force *)addr = value; +} +#endif + +#ifndef __raw_writew +#define __raw_writew __raw_writew +static inline void __raw_writew(u16 value, volatile void __iomem *addr) +{ + *(volatile u16 __force *)addr = value; +} +#endif + +#ifndef __raw_writel +#define __raw_writel __raw_writel +static inline void __raw_writel(u32 value, volatile void __iomem *addr) +{ + *(volatile u32 __force *)addr = value; +} +#endif + +#ifndef __raw_writeq +#define __raw_writeq __raw_writeq +static inline void __raw_writeq(u64 value, volatile void __iomem *addr) +{ + *(volatile u64 __force *)addr = value; +} +#endif + +/* + * {read,write}{b,w,l,q}() access little endian memory and return result in + * native endianness. + */ + +#ifndef readb +#define readb readb +static inline u8 readb(const volatile void __iomem *addr) +{ + u8 val; + + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __raw_readb(addr); + __io_ar(val); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readw +#define readw readw +static inline u16 readw(const volatile void __iomem *addr) +{ + u16 val; + + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + __io_ar(val); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readl +#define readl readl +static inline u32 readl(const volatile void __iomem *addr) +{ + u32 val; + + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + __io_ar(val); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readq +#define readq readq +static inline u64 readq(const volatile void __iomem *addr) +{ + u64 val; + + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + __io_ar(val); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef writeb +#define writeb writeb +static inline void writeb(u8 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeb(value, addr); + __io_aw(); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writew +#define writew writew +static inline void writew(u16 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + __io_aw(); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writel +#define writel writel +static inline void writel(u32 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + __io_aw(); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writeq +#define writeq writeq +static inline void writeq(u64 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + __io_aw(); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); +} +#endif + +/* + * {read,write}{b,w,l,q}_relaxed() are like the regular version, but + * are not guaranteed to provide ordering against spinlocks or memory + * accesses. + */ +#ifndef readb_relaxed +#define readb_relaxed readb_relaxed +static inline u8 readb_relaxed(const volatile void __iomem *addr) +{ + u8 val; + + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + val = __raw_readb(addr); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readw_relaxed +#define readw_relaxed readw_relaxed +static inline u16 readw_relaxed(const volatile void __iomem *addr) +{ + u16 val; + + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readl_relaxed +#define readl_relaxed readl_relaxed +static inline u32 readl_relaxed(const volatile void __iomem *addr) +{ + u32 val; + + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#if defined(readq) && !defined(readq_relaxed) +#define readq_relaxed readq_relaxed +static inline u64 readq_relaxed(const volatile void __iomem *addr) +{ + u64 val; + + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef writeb_relaxed +#define writeb_relaxed writeb_relaxed +static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __raw_writeb(value, addr); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writew_relaxed +#define writew_relaxed writew_relaxed +static inline void writew_relaxed(u16 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writel_relaxed +#define writel_relaxed writel_relaxed +static inline void writel_relaxed(u32 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#if defined(writeq) && !defined(writeq_relaxed) +#define writeq_relaxed writeq_relaxed +static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); +} +#endif + +/* + * {read,write}s{b,w,l,q}() repeatedly access the same memory address in + * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times). + */ +#ifndef readsb +#define readsb readsb +static inline void readsb(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u8 *buf = buffer; + + do { + u8 x = __raw_readb(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsw +#define readsw readsw +static inline void readsw(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u16 *buf = buffer; + + do { + u16 x = __raw_readw(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsl +#define readsl readsl +static inline void readsl(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u32 *buf = buffer; + + do { + u32 x = __raw_readl(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsq +#define readsq readsq +static inline void readsq(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u64 *buf = buffer; + + do { + u64 x = __raw_readq(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef writesb +#define writesb writesb +static inline void writesb(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u8 *buf = buffer; + + do { + __raw_writeb(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesw +#define writesw writesw +static inline void writesw(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u16 *buf = buffer; + + do { + __raw_writew(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesl +#define writesl writesl +static inline void writesl(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u32 *buf = buffer; + + do { + __raw_writel(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesq +#define writesq writesq +static inline void writesq(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u64 *buf = buffer; + + do { + __raw_writeq(*buf++, addr); + } while (--count); + } +} +#endif + +#endif /* _TOOLS_ASM_GENERIC_IO_H */ diff --git a/tools/include/asm/io.h b/tools/include/asm/io.h new file mode 100644 index 000000000000..eed5066f25c4 --- /dev/null +++ b/tools/include/asm/io.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_IO_H +#define _TOOLS_ASM_IO_H + +#if defined(__i386__) || defined(__x86_64__) +#include "../../arch/x86/include/asm/io.h" +#else +#include <asm-generic/io.h> +#endif + +#endif /* _TOOLS_ASM_IO_H */ diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 01907b33537e..50c66ba9ada5 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -12,4 +12,26 @@ void atomic_long_set(atomic_long_t *v, long i); #define atomic_cmpxchg_release atomic_cmpxchg #endif /* atomic_cmpxchg_relaxed */ +static inline bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new) +{ + int ret, old = *oldp; + + ret = atomic_cmpxchg(ptr, old, new); + if (ret != old) + *oldp = ret; + return ret == old; +} + +static inline bool atomic_inc_unless_negative(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c + 1)); + + return true; +} + #endif /* __TOOLS_LINUX_ATOMIC_H */ diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index d4d300040d01..0d992245c600 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -3,6 +3,7 @@ #define _TOOLS_LINUX_BITMAP_H #include <string.h> +#include <asm-generic/bitsperlong.h> #include <linux/align.h> #include <linux/bitops.h> #include <linux/find.h> diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h index 685f7181780f..a86af9bc8bdc 100644 --- a/tools/include/linux/cfi_types.h +++ b/tools/include/linux/cfi_types.h @@ -8,7 +8,7 @@ #ifdef __ASSEMBLY__ #include <linux/linkage.h> -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI /* * Use the __kcfi_typeid_<function> type identifier symbol to * annotate indirectly called assembly functions. The compiler emits @@ -29,12 +29,12 @@ #define SYM_TYPED_START(name, linkage, align...) \ SYM_TYPED_ENTRY(name, linkage, align) -#else /* CONFIG_CFI_CLANG */ +#else /* CONFIG_CFI */ #define SYM_TYPED_START(name, linkage, align...) \ SYM_START(name, linkage, align) -#endif /* CONFIG_CFI_CLANG */ +#endif /* CONFIG_CFI */ #ifndef SYM_TYPED_FUNC_START #define SYM_TYPED_FUNC_START(name) \ @@ -43,7 +43,7 @@ #else /* __ASSEMBLY__ */ -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI #define DEFINE_CFI_TYPE(name, func) \ /* \ * Force a reference to the function so the compiler generates \ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 33411ca0cc90..f40bd2b04c29 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -138,6 +138,10 @@ # define __force #endif +#ifndef __iomem +# define __iomem +#endif + #ifndef __weak # define __weak __attribute__((weak)) #endif diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h index 5f9f1ed190a0..65db9349f905 100644 --- a/tools/include/linux/gfp_types.h +++ b/tools/include/linux/gfp_types.h @@ -1 +1,392 @@ -#include "../../../include/linux/gfp_types.h" +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GFP_TYPES_H +#define __LINUX_GFP_TYPES_H + +#include <linux/bits.h> + +/* The typedef is in types.h but we want the documentation here */ +#if 0 +/** + * typedef gfp_t - Memory allocation flags. + * + * GFP flags are commonly used throughout Linux to indicate how memory + * should be allocated. The GFP acronym stands for get_free_pages(), + * the underlying memory allocation function. Not every GFP flag is + * supported by every function which may allocate memory. Most users + * will want to use a plain ``GFP_KERNEL``. + */ +typedef unsigned int __bitwise gfp_t; +#endif + +/* + * In case of changes, please don't forget to update + * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c + */ + +enum { + ___GFP_DMA_BIT, + ___GFP_HIGHMEM_BIT, + ___GFP_DMA32_BIT, + ___GFP_MOVABLE_BIT, + ___GFP_RECLAIMABLE_BIT, + ___GFP_HIGH_BIT, + ___GFP_IO_BIT, + ___GFP_FS_BIT, + ___GFP_ZERO_BIT, + ___GFP_UNUSED_BIT, /* 0x200u unused */ + ___GFP_DIRECT_RECLAIM_BIT, + ___GFP_KSWAPD_RECLAIM_BIT, + ___GFP_WRITE_BIT, + ___GFP_NOWARN_BIT, + ___GFP_RETRY_MAYFAIL_BIT, + ___GFP_NOFAIL_BIT, + ___GFP_NORETRY_BIT, + ___GFP_MEMALLOC_BIT, + ___GFP_COMP_BIT, + ___GFP_NOMEMALLOC_BIT, + ___GFP_HARDWALL_BIT, + ___GFP_THISNODE_BIT, + ___GFP_ACCOUNT_BIT, + ___GFP_ZEROTAGS_BIT, +#ifdef CONFIG_KASAN_HW_TAGS + ___GFP_SKIP_ZERO_BIT, + ___GFP_SKIP_KASAN_BIT, +#endif +#ifdef CONFIG_LOCKDEP + ___GFP_NOLOCKDEP_BIT, +#endif +#ifdef CONFIG_SLAB_OBJ_EXT + ___GFP_NO_OBJ_EXT_BIT, +#endif + ___GFP_LAST_BIT +}; + +/* Plain integer GFP bitmasks. Do not use this directly. */ +#define ___GFP_DMA BIT(___GFP_DMA_BIT) +#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT) +#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT) +#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT) +#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT) +#define ___GFP_HIGH BIT(___GFP_HIGH_BIT) +#define ___GFP_IO BIT(___GFP_IO_BIT) +#define ___GFP_FS BIT(___GFP_FS_BIT) +#define ___GFP_ZERO BIT(___GFP_ZERO_BIT) +/* 0x200u unused */ +#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT) +#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT) +#define ___GFP_WRITE BIT(___GFP_WRITE_BIT) +#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT) +#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT) +#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT) +#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT) +#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT) +#define ___GFP_COMP BIT(___GFP_COMP_BIT) +#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT) +#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT) +#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT) +#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT) +#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT) +#ifdef CONFIG_KASAN_HW_TAGS +#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT) +#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT) +#else +#define ___GFP_SKIP_ZERO 0 +#define ___GFP_SKIP_KASAN 0 +#endif +#ifdef CONFIG_LOCKDEP +#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT) +#else +#define ___GFP_NOLOCKDEP 0 +#endif +#ifdef CONFIG_SLAB_OBJ_EXT +#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT) +#else +#define ___GFP_NO_OBJ_EXT 0 +#endif + +/* + * Physical address zone modifiers (see linux/mmzone.h - low four bits) + * + * Do not put any conditional on these. If necessary modify the definitions + * without the underscores and use them consistently. The definitions here may + * be used in bit comparisons. + */ +#define __GFP_DMA ((__force gfp_t)___GFP_DMA) +#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) +#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) +#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ +#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) + +/** + * DOC: Page mobility and placement hints + * + * Page mobility and placement hints + * --------------------------------- + * + * These flags provide hints about how mobile the page is. Pages with similar + * mobility are placed within the same pageblocks to minimise problems due + * to external fragmentation. + * + * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be + * moved by page migration during memory compaction or can be reclaimed. + * + * %__GFP_RECLAIMABLE is used for slab allocations that specify + * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. + * + * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible, + * these pages will be spread between local zones to avoid all the dirty + * pages being in one zone (fair zone allocation policy). + * + * %__GFP_HARDWALL enforces the cpuset memory allocation policy. + * + * %__GFP_THISNODE forces the allocation to be satisfied from the requested + * node with no fallbacks or placement policy enforcements. + * + * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. + * + * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. + */ +#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) +#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) +#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) +#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) +#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) +#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT) + +/** + * DOC: Watermark modifiers + * + * Watermark modifiers -- controls access to emergency reserves + * ------------------------------------------------------------ + * + * %__GFP_HIGH indicates that the caller is high-priority and that granting + * the request is necessary before the system can make forward progress. + * For example creating an IO context to clean pages and requests + * from atomic context. + * + * %__GFP_MEMALLOC allows access to all memory. This should only be used when + * the caller guarantees the allocation will allow more memory to be freed + * very shortly e.g. process exiting or swapping. Users either should + * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). + * Users of this flag have to be extremely careful to not deplete the reserve + * completely and implement a throttling mechanism which controls the + * consumption of the reserve based on the amount of freed memory. + * Usage of a pre-allocated pool (e.g. mempool) should be always considered + * before using this flag. + * + * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. + * This takes precedence over the %__GFP_MEMALLOC flag if both are set. + */ +#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) +#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) +#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) + +/** + * DOC: Reclaim modifiers + * + * Reclaim modifiers + * ----------------- + * Please note that all the following flags are only applicable to sleepable + * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). + * + * %__GFP_IO can start physical IO. + * + * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the + * allocator recursing into the filesystem which might already be holding + * locks. + * + * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. + * This flag can be cleared to avoid unnecessary delays when a fallback + * option is available. + * + * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when + * the low watermark is reached and have it reclaim pages until the high + * watermark is reached. A caller may wish to clear this flag when fallback + * options are available and the reclaim is likely to disrupt the system. The + * canonical example is THP allocation where a fallback is cheap but + * reclaim/compaction may cause indirect stalls. + * + * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. + * + * The default allocator behavior depends on the request size. We have a concept + * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). + * !costly allocations are too essential to fail so they are implicitly + * non-failing by default (with some exceptions like OOM victims might fail so + * the caller still has to check for failures) while costly requests try to be + * not disruptive and back off even without invoking the OOM killer. + * The following three modifiers might be used to override some of these + * implicit rules. Please note that all of them must be used along with + * %__GFP_DIRECT_RECLAIM flag. + * + * %__GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput. + * + * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made elsewhere. It can wait for other + * tasks to attempt high-level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with %__GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. + * + * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM. + * It should _never_ be used in non-sleepable contexts. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is + * not supported. Please consider using kvmalloc() instead. + */ +#define __GFP_IO ((__force gfp_t)___GFP_IO) +#define __GFP_FS ((__force gfp_t)___GFP_FS) +#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ +#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) +#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) +#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) + +/** + * DOC: Action modifiers + * + * Action modifiers + * ---------------- + * + * %__GFP_NOWARN suppresses allocation failure reports. + * + * %__GFP_COMP address compound page metadata. + * + * %__GFP_ZERO returns a zeroed page on success. + * + * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself + * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that + * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting + * memory tags at the same time as zeroing memory has minimal additional + * performance impact. + * + * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. + * Used for userspace and vmalloc pages; the latter are unpoisoned by + * kasan_unpoison_vmalloc instead. For userspace pages, results in + * poisoning being skipped as well, see should_skip_kasan_poison for + * details. Only effective in HW_TAGS mode. + */ +#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) +#define __GFP_COMP ((__force gfp_t)___GFP_COMP) +#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) +#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) +#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) +#define __GFP_SKIP_KASAN ((__force gfp_t)___GFP_SKIP_KASAN) + +/* Disable lockdep for GFP context tracking */ +#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) + +/* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT ___GFP_LAST_BIT +#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) + +/** + * DOC: Useful GFP flag combinations + * + * Useful GFP flag combinations + * ---------------------------- + * + * Useful GFP flag combinations that are commonly used. It is recommended + * that subsystems start with one of these combinations and then set/clear + * %__GFP_FOO flags as necessary. + * + * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower + * watermark is applied to allow access to "atomic reserves". + * The current implementation doesn't support NMI and few other strict + * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. + * + * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires + * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. + * + * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is + * accounted to kmemcg. + * + * %GFP_NOWAIT is for kernel allocations that should not stall for direct + * reclaim, start physical IO or use any filesystem callback. It is very + * likely to fail to allocate memory, even for very small allocations. + * + * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages + * that do not require the starting of any physical IO. + * Please try to avoid using this flag directly and instead use + * memalloc_noio_{save,restore} to mark the whole scope which cannot + * perform any IO with a short explanation why. All allocation requests + * will inherit GFP_NOIO implicitly. + * + * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * Please try to avoid using this flag directly and instead use + * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't + * recurse into the FS layer with a short explanation why. All allocation + * requests will inherit GFP_NOFS implicitly. + * + * %GFP_USER is for userspace allocations that also need to be directly + * accessibly by the kernel or hardware. It is typically used by hardware + * for buffers that are mapped to userspace (e.g. graphics) that hardware + * still must DMA to. cpuset limits are enforced for these allocations. + * + * %GFP_DMA exists for historical reasons and should be avoided where possible. + * The flags indicates that the caller requires that the lowest zone be + * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but + * it would require careful auditing as some users really require it and + * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the + * lowest zone as a type of emergency reserve. + * + * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit + * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory + * because the DMA32 kmalloc cache array is not implemented. + * (Reason: there is no such user in kernel). + * + * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, + * do not need to be directly accessible by the kernel but that cannot + * move once in use. An example may be a hardware allocation that maps + * data directly into userspace but has no addressing limitations. + * + * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not + * need direct access to but can use kmap() when access is required. They + * are expected to be movable via page reclaim or page migration. Typically, + * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE. + * + * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They + * are compound allocations that will generally fail quickly if memory is not + * available and will not wake kswapd/kcompactd on failure. The _LIGHT + * version does not attempt reclaim/compaction at all and is by default used + * in page fault path, while the non-light is used by khugepaged. + */ +#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) +#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN) +#define GFP_NOIO (__GFP_RECLAIM) +#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_DMA __GFP_DMA +#define GFP_DMA32 __GFP_DMA32 +#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) +#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | __GFP_SKIP_KASAN) +#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ + __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) +#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) + +#endif /* __LINUX_GFP_TYPES_H */ diff --git a/tools/include/linux/io.h b/tools/include/linux/io.h index e129871fe661..4b94b84160b8 100644 --- a/tools/include/linux/io.h +++ b/tools/include/linux/io.h @@ -2,4 +2,6 @@ #ifndef _TOOLS_IO_H #define _TOOLS_IO_H -#endif +#include <asm/io.h> + +#endif /* _TOOLS_IO_H */ diff --git a/tools/include/linux/pci_ids.h b/tools/include/linux/pci_ids.h new file mode 120000 index 000000000000..1c9e88f41261 --- /dev/null +++ b/tools/include/linux/pci_ids.h @@ -0,0 +1 @@ +../../../include/linux/pci_ids.h
\ No newline at end of file diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index c87051e2b26f..94937a699402 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -4,11 +4,31 @@ #include <linux/types.h> #include <linux/gfp.h> +#include <pthread.h> -#define SLAB_PANIC 2 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define kzalloc_node(size, flags, node) kmalloc(size, flags) +enum _slab_flag_bits { + _SLAB_KMALLOC, + _SLAB_HWCACHE_ALIGN, + _SLAB_PANIC, + _SLAB_TYPESAFE_BY_RCU, + _SLAB_ACCOUNT, + _SLAB_FLAGS_LAST_BIT +}; + +#define __SLAB_FLAG_BIT(nr) ((unsigned int __force)(1U << (nr))) +#define __SLAB_FLAG_UNUSED ((unsigned int __force)(0U)) + +#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) +#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) +#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) +#ifdef CONFIG_MEMCG +# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) +#else +# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED +#endif void *kmalloc(size_t size, gfp_t gfp); void kfree(void *p); @@ -23,6 +43,98 @@ enum slab_state { FULL }; +struct kmem_cache { + pthread_mutex_t lock; + unsigned int size; + unsigned int align; + unsigned int sheaf_capacity; + int nr_objs; + void *objs; + void (*ctor)(void *); + bool non_kernel_enabled; + unsigned int non_kernel; + unsigned long nr_allocated; + unsigned long nr_tallocated; + bool exec_callback; + void (*callback)(void *); + void *private; +}; + +struct kmem_cache_args { + /** + * @align: The required alignment for the objects. + * + * %0 means no specific alignment is requested. + */ + unsigned int align; + /** + * @sheaf_capacity: The maximum size of the sheaf. + */ + unsigned int sheaf_capacity; + /** + * @useroffset: Usercopy region offset. + * + * %0 is a valid offset, when @usersize is non-%0 + */ + unsigned int useroffset; + /** + * @usersize: Usercopy region size. + * + * %0 means no usercopy region is specified. + */ + unsigned int usersize; + /** + * @freeptr_offset: Custom offset for the free pointer + * in &SLAB_TYPESAFE_BY_RCU caches + * + * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer + * outside of the object. This might cause the object to grow in size. + * Cache creators that have a reason to avoid this can specify a custom + * free pointer offset in their struct where the free pointer will be + * placed. + * + * Note that placing the free pointer inside the object requires the + * caller to ensure that no fields are invalidated that are required to + * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for + * details). + * + * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset + * is specified, %use_freeptr_offset must be set %true. + * + * Note that @ctor currently isn't supported with custom free pointers + * as a @ctor requires an external free pointer. + */ + unsigned int freeptr_offset; + /** + * @use_freeptr_offset: Whether a @freeptr_offset is used. + */ + bool use_freeptr_offset; + /** + * @ctor: A constructor for the objects. + * + * The constructor is invoked for each object in a newly allocated slab + * page. It is the cache user's responsibility to free object in the + * same state as after calling the constructor, or deal appropriately + * with any differences between a freshly constructed and a reallocated + * object. + * + * %NULL means no constructor. + */ + void (*ctor)(void *); +}; + +struct slab_sheaf { + union { + struct list_head barn_list; + /* only used for prefilled sheafs */ + unsigned int capacity; + }; + struct kmem_cache *cache; + unsigned int size; + int node; /* only used for rcu_sheaf */ + void *objects[]; +}; + static inline void *kzalloc(size_t size, gfp_t gfp) { return kmalloc(size, gfp | __GFP_ZERO); @@ -37,12 +149,57 @@ static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) } void kmem_cache_free(struct kmem_cache *cachep, void *objp); -struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, - unsigned int align, unsigned int flags, - void (*ctor)(void *)); + +struct kmem_cache * +__kmem_cache_create_args(const char *name, unsigned int size, + struct kmem_cache_args *args, unsigned int flags); + +/* If NULL is passed for @args, use this variant with default arguments. */ +static inline struct kmem_cache * +__kmem_cache_default_args(const char *name, unsigned int size, + struct kmem_cache_args *args, unsigned int flags) +{ + struct kmem_cache_args kmem_default_args = {}; + + return __kmem_cache_create_args(name, size, &kmem_default_args, flags); +} + +static inline struct kmem_cache * +__kmem_cache_create(const char *name, unsigned int size, unsigned int align, + unsigned int flags, void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + }; + + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} + +#define kmem_cache_create(__name, __object_size, __args, ...) \ + _Generic((__args), \ + struct kmem_cache_args *: __kmem_cache_create_args, \ + void *: __kmem_cache_default_args, \ + default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list); int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, void **list); +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size); + +void * +kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size); + +static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) +{ + return sheaf->size; +} #endif /* _TOOLS_SLAB_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index c199ade200c2..d2f5aa085f8e 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -116,6 +116,7 @@ #include "sched.h" #include "signal.h" #include "unistd.h" +#include "stdbool.h" #include "stdio.h" #include "stdlib.h" #include "string.h" diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h index 1765acb17ea0..0d053f93ea99 100644 --- a/tools/include/nolibc/poll.h +++ b/tools/include/nolibc/poll.h @@ -39,10 +39,8 @@ int sys_poll(struct pollfd *fds, int nfds, int timeout) t.tv_nsec = (timeout % 1000) * 1000000; } return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#elif defined(__NR_poll) - return my_syscall3(__NR_poll, fds, nfds, timeout); #else - return __nolibc_enosys(__func__, fds, nfds, timeout); + return my_syscall3(__NR_poll, fds, nfds, timeout); #endif } diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h index ba950f0e7338..2c1ad23b9b5c 100644 --- a/tools/include/nolibc/std.h +++ b/tools/include/nolibc/std.h @@ -29,6 +29,6 @@ typedef unsigned long nlink_t; typedef signed long off_t; typedef signed long blksize_t; typedef signed long blkcnt_t; -typedef __kernel_old_time_t time_t; +typedef __kernel_time_t time_t; #endif /* _NOLIBC_STD_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 295e71d34aba..c5564f57deec 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -142,10 +142,8 @@ int sys_chmod(const char *path, mode_t mode) { #if defined(__NR_fchmodat) return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); -#elif defined(__NR_chmod) - return my_syscall2(__NR_chmod, path, mode); #else - return __nolibc_enosys(__func__, path, mode); + return my_syscall2(__NR_chmod, path, mode); #endif } @@ -165,10 +163,8 @@ int sys_chown(const char *path, uid_t owner, gid_t group) { #if defined(__NR_fchownat) return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); -#elif defined(__NR_chown) - return my_syscall3(__NR_chown, path, owner, group); #else - return __nolibc_enosys(__func__, path, owner, group); + return my_syscall3(__NR_chown, path, owner, group); #endif } @@ -238,11 +234,22 @@ static __attribute__((unused)) int sys_dup2(int old, int new) { #if defined(__NR_dup3) + int ret, nr_fcntl; + +#ifdef __NR_fcntl64 + nr_fcntl = __NR_fcntl64; +#else + nr_fcntl = __NR_fcntl; +#endif + + if (old == new) { + ret = my_syscall2(nr_fcntl, old, F_GETFD); + return ret < 0 ? ret : old; + } + return my_syscall3(__NR_dup3, old, new, 0); -#elif defined(__NR_dup2) - return my_syscall2(__NR_dup2, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_dup2, old, new); #endif } @@ -327,10 +334,8 @@ pid_t sys_fork(void) * will not use the rest with no other flag. */ return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); -#elif defined(__NR_fork) - return my_syscall0(__NR_fork); #else - return __nolibc_enosys(__func__); + return my_syscall0(__NR_fork); #endif } #endif @@ -347,7 +352,7 @@ pid_t sys_vfork(void) { #if defined(__NR_vfork) return my_syscall0(__NR_vfork); -#elif defined(__NR_clone3) +#else /* * clone() could be used but has different argument orders per * architecture. @@ -358,8 +363,6 @@ pid_t sys_vfork(void) }; return my_syscall2(__NR_clone3, &args, sizeof(args)); -#else - return __nolibc_enosys(__func__); #endif } #endif @@ -569,10 +572,8 @@ int sys_link(const char *old, const char *new) { #if defined(__NR_linkat) return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); -#elif defined(__NR_link) - return my_syscall2(__NR_link, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_link, old, new); #endif } @@ -593,41 +594,27 @@ off_t sys_lseek(int fd, off_t offset, int whence) #if defined(__NR_lseek) return my_syscall3(__NR_lseek, fd, offset, whence); #else - return __nolibc_enosys(__func__, fd, offset, whence); -#endif -} + __kernel_loff_t loff = 0; + off_t result; + int ret; -static __attribute__((unused)) -int sys_llseek(int fd, unsigned long offset_high, unsigned long offset_low, - __kernel_loff_t *result, int whence) -{ -#if defined(__NR_llseek) - return my_syscall5(__NR_llseek, fd, offset_high, offset_low, result, whence); -#else - return __nolibc_enosys(__func__, fd, offset_high, offset_low, result, whence); + /* Only exists on 32bit where nolibc off_t is also 32bit */ + ret = my_syscall5(__NR_llseek, fd, 0, offset, &loff, whence); + if (ret < 0) + result = ret; + else if (loff != (off_t)loff) + result = -EOVERFLOW; + else + result = loff; + + return result; #endif } static __attribute__((unused)) off_t lseek(int fd, off_t offset, int whence) { - __kernel_loff_t loff = 0; - off_t result; - int ret; - - result = sys_lseek(fd, offset, whence); - if (result == -ENOSYS) { - /* Only exists on 32bit where nolibc off_t is also 32bit */ - ret = sys_llseek(fd, 0, offset, &loff, whence); - if (ret < 0) - result = ret; - else if (loff != (off_t)loff) - result = -EOVERFLOW; - else - result = loff; - } - - return __sysret(result); + return __sysret(sys_lseek(fd, offset, whence)); } @@ -640,10 +627,8 @@ int sys_mkdir(const char *path, mode_t mode) { #if defined(__NR_mkdirat) return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); -#elif defined(__NR_mkdir) - return my_syscall2(__NR_mkdir, path, mode); #else - return __nolibc_enosys(__func__, path, mode); + return my_syscall2(__NR_mkdir, path, mode); #endif } @@ -662,10 +647,8 @@ int sys_rmdir(const char *path) { #if defined(__NR_rmdir) return my_syscall1(__NR_rmdir, path); -#elif defined(__NR_unlinkat) - return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); #else - return __nolibc_enosys(__func__, path); + return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); #endif } @@ -685,10 +668,8 @@ long sys_mknod(const char *path, mode_t mode, dev_t dev) { #if defined(__NR_mknodat) return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); -#elif defined(__NR_mknod) - return my_syscall3(__NR_mknod, path, mode, dev); #else - return __nolibc_enosys(__func__, path, mode, dev); + return my_syscall3(__NR_mknod, path, mode, dev); #endif } @@ -801,7 +782,7 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva t.tv_nsec = timeout->tv_usec * 1000; } return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); -#elif defined(__NR_pselect6_time64) +#else struct __kernel_timespec t; if (timeout) { @@ -809,8 +790,6 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva t.tv_nsec = timeout->tv_usec * 1000; } return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); -#else - return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout); #endif } @@ -874,10 +853,8 @@ int sys_symlink(const char *old, const char *new) { #if defined(__NR_symlinkat) return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); -#elif defined(__NR_symlink) - return my_syscall2(__NR_symlink, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_symlink, old, new); #endif } @@ -931,10 +908,8 @@ int sys_unlink(const char *path) { #if defined(__NR_unlinkat) return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); -#elif defined(__NR_unlink) - return my_syscall1(__NR_unlink, path); #else - return __nolibc_enosys(__func__, path); + return my_syscall1(__NR_unlink, path); #endif } diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h index 8d9749f1c845..cd5d25c571a8 100644 --- a/tools/include/nolibc/sys/random.h +++ b/tools/include/nolibc/sys/random.h @@ -22,13 +22,13 @@ static __attribute__((unused)) ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags) { - return my_syscall3(__NR_getrandom, buf, buflen, flags); + return my_syscall3(__NR_getrandom, buf, buflen, flags); } static __attribute__((unused)) ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) { - return __sysret(sys_getrandom(buf, buflen, flags)); + return __sysret(sys_getrandom(buf, buflen, flags)); } #endif /* _NOLIBC_SYS_RANDOM_H */ diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h index 4375d546ba58..5dd61030c991 100644 --- a/tools/include/nolibc/sys/timerfd.h +++ b/tools/include/nolibc/sys/timerfd.h @@ -34,7 +34,7 @@ int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) { #if defined(__NR_timerfd_gettime) return my_syscall2(__NR_timerfd_gettime, fd, curr_value); -#elif defined(__NR_timerfd_gettime64) +#else struct __kernel_itimerspec kcurr_value; int ret; @@ -42,8 +42,6 @@ int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); return ret; -#else - return __nolibc_enosys(__func__, fd, curr_value); #endif } @@ -60,7 +58,7 @@ int sys_timerfd_settime(int fd, int flags, { #if defined(__NR_timerfd_settime) return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); -#elif defined(__NR_timerfd_settime64) +#else struct __kernel_itimerspec knew_value, kold_value; int ret; @@ -72,8 +70,6 @@ int sys_timerfd_settime(int fd, int flags, __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); } return ret; -#else - return __nolibc_enosys(__func__, fd, flags, new_value, old_value); #endif } diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h index 56ddb806da7f..4e66e1f7a03e 100644 --- a/tools/include/nolibc/sys/wait.h +++ b/tools/include/nolibc/sys/wait.h @@ -16,28 +16,11 @@ /* * pid_t wait(int *status); - * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); * pid_t waitpid(pid_t pid, int *status, int options); * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); */ static __attribute__((unused)) -pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ -#ifdef __NR_wait4 - return my_syscall4(__NR_wait4, pid, status, options, rusage); -#else - return __nolibc_enosys(__func__, pid, status, options, rusage); -#endif -} - -static __attribute__((unused)) -pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - return __sysret(sys_wait4(pid, status, options, rusage)); -} - -static __attribute__((unused)) int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) { return my_syscall5(__NR_waitid, which, pid, infop, options, rusage); diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index d02bc44d2643..6c276b8d646a 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -45,7 +45,7 @@ int sys_clock_getres(clockid_t clockid, struct timespec *res) { #if defined(__NR_clock_getres) return my_syscall2(__NR_clock_getres, clockid, res); -#elif defined(__NR_clock_getres_time64) +#else struct __kernel_timespec kres; int ret; @@ -53,8 +53,6 @@ int sys_clock_getres(clockid_t clockid, struct timespec *res) if (res) __nolibc_timespec_kernel_to_user(&kres, res); return ret; -#else - return __nolibc_enosys(__func__, clockid, res); #endif } @@ -69,7 +67,7 @@ int sys_clock_gettime(clockid_t clockid, struct timespec *tp) { #if defined(__NR_clock_gettime) return my_syscall2(__NR_clock_gettime, clockid, tp); -#elif defined(__NR_clock_gettime64) +#else struct __kernel_timespec ktp; int ret; @@ -77,8 +75,6 @@ int sys_clock_gettime(clockid_t clockid, struct timespec *tp) if (tp) __nolibc_timespec_kernel_to_user(&ktp, tp); return ret; -#else - return __nolibc_enosys(__func__, clockid, tp); #endif } @@ -133,7 +129,8 @@ static __attribute__((unused)) int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, struct timespec *rmtp) { - return __sysret(sys_clock_nanosleep(clockid, flags, rqtp, rmtp)); + /* Directly return a positive error number */ + return -sys_clock_nanosleep(clockid, flags, rqtp, rmtp); } static __inline__ @@ -145,7 +142,7 @@ double difftime(time_t time1, time_t time2) static __inline__ int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) { - return clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp); + return __sysret(sys_clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp)); } diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index 25bfc7732ec7..7405fa2b89ba 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -33,7 +33,7 @@ static __attribute__((unused)) int sys_faccessat(int fd, const char *path, int amode, int flag) { - return my_syscall4(__NR_faccessat, fd, path, amode, flag); + return my_syscall4(__NR_faccessat, fd, path, amode, flag); } static __attribute__((unused)) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 233de8677382..ae83d8649ef1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1522,6 +1522,12 @@ union bpf_attr { * If provided, map_flags should have BPF_F_TOKEN_FD flag set. */ __s32 map_token_fd; + + /* Hash of the program that has exclusive access to the map. + */ + __aligned_u64 excl_prog_hash; + /* Size of the passed excl_prog_hash. */ + __u32 excl_prog_hash_size; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ @@ -1605,6 +1611,16 @@ union bpf_attr { * continuous. */ __u32 fd_array_cnt; + /* Pointer to a buffer containing the signature of the BPF + * program. + */ + __aligned_u64 signature; + /* Size of the signature buffer in bytes. */ + __u32 signature_size; + /* ID of the kernel keyring to be used for signature + * verification. + */ + __s32 keyring_id; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -6666,6 +6682,8 @@ struct bpf_map_info { __u32 btf_value_type_id; __u32 btf_vmlinux_id; __u64 map_extra; + __aligned_u64 hash; + __u32 hash_size; } __attribute__((aligned(8))); struct bpf_btf_info { @@ -7418,6 +7436,10 @@ struct bpf_timer { __u64 __opaque[2]; } __attribute__((aligned(8))); +struct bpf_task_work { + __u64 __opaque; +} __attribute__((aligned(8))); + struct bpf_wq { __u64 __opaque[2]; } __attribute__((aligned(8))); diff --git a/tools/include/uapi/linux/genetlink.h b/tools/include/uapi/linux/genetlink.h new file mode 100644 index 000000000000..ddba3ca01e39 --- /dev/null +++ b/tools/include/uapi/linux/genetlink.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_GENERIC_NETLINK_H +#define _UAPI__LINUX_GENERIC_NETLINK_H + +#include <linux/types.h> +#include <linux/netlink.h> + +#define GENL_NAMSIZ 16 /* length of family name */ + +#define GENL_MIN_ID NLMSG_MIN_TYPE +#define GENL_MAX_ID 1023 + +struct genlmsghdr { + __u8 cmd; + __u8 version; + __u16 reserved; +}; + +#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) + +#define GENL_ADMIN_PERM 0x01 +#define GENL_CMD_CAP_DO 0x02 +#define GENL_CMD_CAP_DUMP 0x04 +#define GENL_CMD_CAP_HASPOL 0x08 +#define GENL_UNS_ADMIN_PERM 0x10 + +/* + * List of reserved static generic netlink identifiers: + */ +#define GENL_ID_CTRL NLMSG_MIN_TYPE +#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) +#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) +/* must be last reserved + 1 */ +#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3) + +/************************************************************************** + * Controller + **************************************************************************/ + +enum { + CTRL_CMD_UNSPEC, + CTRL_CMD_NEWFAMILY, + CTRL_CMD_DELFAMILY, + CTRL_CMD_GETFAMILY, + CTRL_CMD_NEWOPS, + CTRL_CMD_DELOPS, + CTRL_CMD_GETOPS, + CTRL_CMD_NEWMCAST_GRP, + CTRL_CMD_DELMCAST_GRP, + CTRL_CMD_GETMCAST_GRP, /* unused */ + CTRL_CMD_GETPOLICY, + __CTRL_CMD_MAX, +}; + +#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1) + +enum { + CTRL_ATTR_UNSPEC, + CTRL_ATTR_FAMILY_ID, + CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, + CTRL_ATTR_MCAST_GROUPS, + CTRL_ATTR_POLICY, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP, + __CTRL_ATTR_MAX, +}; + +#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) + +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + +enum { + CTRL_ATTR_MCAST_GRP_UNSPEC, + CTRL_ATTR_MCAST_GRP_NAME, + CTRL_ATTR_MCAST_GRP_ID, + __CTRL_ATTR_MCAST_GRP_MAX, +}; + +#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) + +enum { + CTRL_ATTR_POLICY_UNSPEC, + CTRL_ATTR_POLICY_DO, + CTRL_ATTR_POLICY_DUMP, + + __CTRL_ATTR_POLICY_DUMP_MAX, + CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1 +}; + +#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1) + +#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */ diff --git a/tools/include/uapi/linux/if_addr.h b/tools/include/uapi/linux/if_addr.h new file mode 100644 index 000000000000..aa7958b4e41d --- /dev/null +++ b/tools/include/uapi/linux/if_addr.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_IF_ADDR_H +#define _UAPI__LINUX_IF_ADDR_H + +#include <linux/types.h> +#include <linux/netlink.h> + +struct ifaddrmsg { + __u8 ifa_family; + __u8 ifa_prefixlen; /* The prefix length */ + __u8 ifa_flags; /* Flags */ + __u8 ifa_scope; /* Address scope */ + __u32 ifa_index; /* Link index */ +}; + +/* + * Important comment: + * IFA_ADDRESS is prefix address, rather than local interface address. + * It makes no difference for normally configured broadcast interfaces, + * but for point-to-point IFA_ADDRESS is DESTINATION address, + * local address is supplied in IFA_LOCAL attribute. + * + * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags. + * If present, the value from struct ifaddrmsg will be ignored. + */ +enum { + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST, + IFA_CACHEINFO, + IFA_MULTICAST, + IFA_FLAGS, + IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_TARGET_NETNSID, + IFA_PROTO, /* u8, address protocol */ + __IFA_MAX, +}; + +#define IFA_MAX (__IFA_MAX - 1) + +/* ifa_flags */ +#define IFA_F_SECONDARY 0x01 +#define IFA_F_TEMPORARY IFA_F_SECONDARY + +#define IFA_F_NODAD 0x02 +#define IFA_F_OPTIMISTIC 0x04 +#define IFA_F_DADFAILED 0x08 +#define IFA_F_HOMEADDRESS 0x10 +#define IFA_F_DEPRECATED 0x20 +#define IFA_F_TENTATIVE 0x40 +#define IFA_F_PERMANENT 0x80 +#define IFA_F_MANAGETEMPADDR 0x100 +#define IFA_F_NOPREFIXROUTE 0x200 +#define IFA_F_MCAUTOJOIN 0x400 +#define IFA_F_STABLE_PRIVACY 0x800 + +struct ifa_cacheinfo { + __u32 ifa_prefered; + __u32 ifa_valid; + __u32 cstamp; /* created timestamp, hundredths of seconds */ + __u32 tstamp; /* updated timestamp, hundredths of seconds */ +}; + +/* backwards compatibility for userspace */ +#ifndef __KERNEL__ +#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) +#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) +#endif + +/* ifa_proto */ +#define IFAPROT_UNSPEC 0 +#define IFAPROT_KERNEL_LO 1 /* loopback */ +#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */ +#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */ + +#endif diff --git a/tools/include/uapi/linux/neighbour.h b/tools/include/uapi/linux/neighbour.h new file mode 100644 index 000000000000..c34a81245f87 --- /dev/null +++ b/tools/include/uapi/linux/neighbour.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NEIGHBOUR_H +#define _UAPI__LINUX_NEIGHBOUR_H + +#include <linux/types.h> +#include <linux/netlink.h> + +struct ndmsg { + __u8 ndm_family; + __u8 ndm_pad1; + __u16 ndm_pad2; + __s32 ndm_ifindex; + __u16 ndm_state; + __u8 ndm_flags; + __u8 ndm_type; +}; + +enum { + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + NDA_VLAN, + NDA_PORT, + NDA_VNI, + NDA_IFINDEX, + NDA_MASTER, + NDA_LINK_NETNSID, + NDA_SRC_VNI, + NDA_PROTOCOL, /* Originator of entry */ + NDA_NH_ID, + NDA_FDB_EXT_ATTRS, + NDA_FLAGS_EXT, + NDA_NDM_STATE_MASK, + NDA_NDM_FLAGS_MASK, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_USE (1 << 0) +#define NTF_SELF (1 << 1) +#define NTF_MASTER (1 << 2) +#define NTF_PROXY (1 << 3) /* == ATF_PUBL */ +#define NTF_EXT_LEARNED (1 << 4) +#define NTF_OFFLOADED (1 << 5) +#define NTF_STICKY (1 << 6) +#define NTF_ROUTER (1 << 7) +/* Extended flags under NDA_FLAGS_EXT: */ +#define NTF_EXT_MANAGED (1 << 0) +#define NTF_EXT_LOCKED (1 << 1) +#define NTF_EXT_EXT_VALIDATED (1 << 2) + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no + * address resolution or NUD. + * + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED + * flagged entries explicitly are (which is also consistent with the routing + * subsystem). + * + * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry + * states don't make sense and thus are ignored. Such entries don't age and + * can roam. + * + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf + * of a user space control plane, and automatically refreshed so that (if + * possible) they remain in NUD_REACHABLE state. + * + * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the + * bridge in response to a host trying to communicate via a locked bridge port + * with MAB enabled. Their purpose is to notify user space that a host requires + * authentication. + * + * NTF_EXT_EXT_VALIDATED flagged neighbor entries were externally validated by + * a user space control plane. The kernel will not remove or invalidate them, + * but it can probe them and notify user space when they become reachable. + */ + +struct nda_cacheinfo { + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats { + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; + __u64 ndts_table_fulls; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + NDTPA_QUEUE_LENBYTES, /* u32 */ + NDTPA_MCAST_REPROBES, /* u32 */ + NDTPA_PAD, + NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg { + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config { + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + NDTA_PAD, + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + + /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY: + * - FDB_NOTIFY_BIT - notify on activity/expire for any entry + * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications + */ +enum { + FDB_NOTIFY_BIT = (1 << 0), + FDB_NOTIFY_INACTIVE_BIT = (1 << 1) +}; + +/* embedded into NDA_FDB_EXT_ATTRS: + * [NDA_FDB_EXT_ATTRS] = { + * [NFEA_ACTIVITY_NOTIFY] + * ... + * } + */ +enum { + NFEA_UNSPEC, + NFEA_ACTIVITY_NOTIFY, + NFEA_DONT_REFRESH, + __NFEA_MAX +}; +#define NFEA_MAX (__NFEA_MAX - 1) + +#endif diff --git a/tools/include/uapi/linux/netfilter.h b/tools/include/uapi/linux/netfilter.h new file mode 100644 index 000000000000..5a79ccb76701 --- /dev/null +++ b/tools/include/uapi/linux/netfilter.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NETFILTER_H +#define _UAPI__LINUX_NETFILTER_H + +#include <linux/types.h> +#include <linux/compiler.h> +#include <linux/in.h> +#include <linux/in6.h> + +/* Responses from hook functions. */ +#define NF_DROP 0 +#define NF_ACCEPT 1 +#define NF_STOLEN 2 +#define NF_QUEUE 3 +#define NF_REPEAT 4 +#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */ +#define NF_MAX_VERDICT NF_STOP + +/* we overload the higher bits for encoding auxiliary data such as the queue + * number or errno values. Not nice, but better than additional function + * arguments. */ +#define NF_VERDICT_MASK 0x000000ff + +/* extra verdict flags have mask 0x0000ff00 */ +#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000 + +/* queue number (NF_QUEUE) or errno (NF_DROP) */ +#define NF_VERDICT_QMASK 0xffff0000 +#define NF_VERDICT_QBITS 16 + +#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE) + +#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP) + +/* only for userspace compatibility */ +#ifndef __KERNEL__ + +/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ +#define NF_VERDICT_BITS 16 +#endif + +enum nf_inet_hooks { + NF_INET_PRE_ROUTING, + NF_INET_LOCAL_IN, + NF_INET_FORWARD, + NF_INET_LOCAL_OUT, + NF_INET_POST_ROUTING, + NF_INET_NUMHOOKS, + NF_INET_INGRESS = NF_INET_NUMHOOKS, +}; + +enum nf_dev_hooks { + NF_NETDEV_INGRESS, + NF_NETDEV_EGRESS, + NF_NETDEV_NUMHOOKS +}; + +enum { + NFPROTO_UNSPEC = 0, + NFPROTO_INET = 1, + NFPROTO_IPV4 = 2, + NFPROTO_ARP = 3, + NFPROTO_NETDEV = 5, + NFPROTO_BRIDGE = 7, + NFPROTO_IPV6 = 10, +#ifndef __KERNEL__ /* no longer supported by kernel */ + NFPROTO_DECNET = 12, +#endif + NFPROTO_NUMPROTO, +}; + +union nf_inet_addr { + __u32 all[4]; + __be32 ip; + __be32 ip6[4]; + struct in_addr in; + struct in6_addr in6; +}; + +#endif /* _UAPI__LINUX_NETFILTER_H */ diff --git a/tools/include/uapi/linux/netfilter_arp.h b/tools/include/uapi/linux/netfilter_arp.h new file mode 100644 index 000000000000..791dfc5ae907 --- /dev/null +++ b/tools/include/uapi/linux/netfilter_arp.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ +#ifndef __LINUX_ARP_NETFILTER_H +#define __LINUX_ARP_NETFILTER_H + +/* ARP-specific defines for netfilter. + * (C)2002 Rusty Russell IBM -- This code is GPL. + */ + +#include <linux/netfilter.h> + +/* There is no PF_ARP. */ +#define NF_ARP 0 + +/* ARP Hooks */ +#define NF_ARP_IN 0 +#define NF_ARP_OUT 1 +#define NF_ARP_FORWARD 2 + +#ifndef __KERNEL__ +#define NF_ARP_NUMHOOKS 3 +#endif + +#endif /* __LINUX_ARP_NETFILTER_H */ diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h index 34127653fd00..33c9b578b3b2 100644 --- a/tools/include/uapi/linux/nsfs.h +++ b/tools/include/uapi/linux/nsfs.h @@ -16,8 +16,6 @@ #define NS_GET_NSTYPE _IO(NSIO, 0x3) /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) -/* Get the id for a mount namespace */ -#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) /* Translate pid from target pid namespace into the caller's pid namespace. */ #define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) /* Return thread-group leader id of pid in the callers pid namespace. */ @@ -42,4 +40,19 @@ struct mnt_ns_info { /* Get previous namespace. */ #define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) +/* Retrieve namespace identifiers. */ +#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64) +#define NS_GET_ID _IOR(NSIO, 13, __u64) + +enum init_ns_ino { + IPC_NS_INIT_INO = 0xEFFFFFFFU, + UTS_NS_INIT_INO = 0xEFFFFFFEU, + USER_NS_INIT_INO = 0xEFFFFFFDU, + PID_NS_INIT_INO = 0xEFFFFFFCU, + CGROUP_NS_INIT_INO = 0xEFFFFFFBU, + TIME_NS_INIT_INO = 0xEFFFFFFAU, + NET_NS_INIT_INO = 0xEFFFFFF9U, + MNT_NS_INIT_INO = 0xEFFFFFF8U, +}; + #endif /* __LINUX_NSFS_H */ diff --git a/tools/include/uapi/linux/rtnetlink.h b/tools/include/uapi/linux/rtnetlink.h new file mode 100644 index 000000000000..dab9493c791b --- /dev/null +++ b/tools/include/uapi/linux/rtnetlink.h @@ -0,0 +1,848 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_RTNETLINK_H +#define _UAPI__LINUX_RTNETLINK_H + +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/if_link.h> +#include <linux/if_addr.h> +#include <linux/neighbour.h> + +/* rtnetlink families. Values up to 127 are reserved for real address + * families, values above 128 may be used arbitrarily. + */ +#define RTNL_FAMILY_IPMR 128 +#define RTNL_FAMILY_IP6MR 129 +#define RTNL_FAMILY_MAX 129 + +/**** + * Routing/neighbour discovery messages. + ****/ + +/* Types of messages */ + +enum { + RTM_BASE = 16, +#define RTM_BASE RTM_BASE + + RTM_NEWLINK = 16, +#define RTM_NEWLINK RTM_NEWLINK + RTM_DELLINK, +#define RTM_DELLINK RTM_DELLINK + RTM_GETLINK, +#define RTM_GETLINK RTM_GETLINK + RTM_SETLINK, +#define RTM_SETLINK RTM_SETLINK + + RTM_NEWADDR = 20, +#define RTM_NEWADDR RTM_NEWADDR + RTM_DELADDR, +#define RTM_DELADDR RTM_DELADDR + RTM_GETADDR, +#define RTM_GETADDR RTM_GETADDR + + RTM_NEWROUTE = 24, +#define RTM_NEWROUTE RTM_NEWROUTE + RTM_DELROUTE, +#define RTM_DELROUTE RTM_DELROUTE + RTM_GETROUTE, +#define RTM_GETROUTE RTM_GETROUTE + + RTM_NEWNEIGH = 28, +#define RTM_NEWNEIGH RTM_NEWNEIGH + RTM_DELNEIGH, +#define RTM_DELNEIGH RTM_DELNEIGH + RTM_GETNEIGH, +#define RTM_GETNEIGH RTM_GETNEIGH + + RTM_NEWRULE = 32, +#define RTM_NEWRULE RTM_NEWRULE + RTM_DELRULE, +#define RTM_DELRULE RTM_DELRULE + RTM_GETRULE, +#define RTM_GETRULE RTM_GETRULE + + RTM_NEWQDISC = 36, +#define RTM_NEWQDISC RTM_NEWQDISC + RTM_DELQDISC, +#define RTM_DELQDISC RTM_DELQDISC + RTM_GETQDISC, +#define RTM_GETQDISC RTM_GETQDISC + + RTM_NEWTCLASS = 40, +#define RTM_NEWTCLASS RTM_NEWTCLASS + RTM_DELTCLASS, +#define RTM_DELTCLASS RTM_DELTCLASS + RTM_GETTCLASS, +#define RTM_GETTCLASS RTM_GETTCLASS + + RTM_NEWTFILTER = 44, +#define RTM_NEWTFILTER RTM_NEWTFILTER + RTM_DELTFILTER, +#define RTM_DELTFILTER RTM_DELTFILTER + RTM_GETTFILTER, +#define RTM_GETTFILTER RTM_GETTFILTER + + RTM_NEWACTION = 48, +#define RTM_NEWACTION RTM_NEWACTION + RTM_DELACTION, +#define RTM_DELACTION RTM_DELACTION + RTM_GETACTION, +#define RTM_GETACTION RTM_GETACTION + + RTM_NEWPREFIX = 52, +#define RTM_NEWPREFIX RTM_NEWPREFIX + + RTM_NEWMULTICAST = 56, +#define RTM_NEWMULTICAST RTM_NEWMULTICAST + RTM_DELMULTICAST, +#define RTM_DELMULTICAST RTM_DELMULTICAST + RTM_GETMULTICAST, +#define RTM_GETMULTICAST RTM_GETMULTICAST + + RTM_NEWANYCAST = 60, +#define RTM_NEWANYCAST RTM_NEWANYCAST + RTM_DELANYCAST, +#define RTM_DELANYCAST RTM_DELANYCAST + RTM_GETANYCAST, +#define RTM_GETANYCAST RTM_GETANYCAST + + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + + RTM_NEWNDUSEROPT = 68, +#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT + + RTM_NEWADDRLABEL = 72, +#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL + RTM_DELADDRLABEL, +#define RTM_DELADDRLABEL RTM_DELADDRLABEL + RTM_GETADDRLABEL, +#define RTM_GETADDRLABEL RTM_GETADDRLABEL + + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_DELNETCONF, +#define RTM_DELNETCONF RTM_DELNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + + RTM_NEWNSID = 88, +#define RTM_NEWNSID RTM_NEWNSID + RTM_DELNSID = 89, +#define RTM_DELNSID RTM_DELNSID + RTM_GETNSID = 90, +#define RTM_GETNSID RTM_GETNSID + + RTM_NEWSTATS = 92, +#define RTM_NEWSTATS RTM_NEWSTATS + RTM_GETSTATS = 94, +#define RTM_GETSTATS RTM_GETSTATS + RTM_SETSTATS, +#define RTM_SETSTATS RTM_SETSTATS + + RTM_NEWCACHEREPORT = 96, +#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT + + RTM_NEWCHAIN = 100, +#define RTM_NEWCHAIN RTM_NEWCHAIN + RTM_DELCHAIN, +#define RTM_DELCHAIN RTM_DELCHAIN + RTM_GETCHAIN, +#define RTM_GETCHAIN RTM_GETCHAIN + + RTM_NEWNEXTHOP = 104, +#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP + RTM_DELNEXTHOP, +#define RTM_DELNEXTHOP RTM_DELNEXTHOP + RTM_GETNEXTHOP, +#define RTM_GETNEXTHOP RTM_GETNEXTHOP + + RTM_NEWLINKPROP = 108, +#define RTM_NEWLINKPROP RTM_NEWLINKPROP + RTM_DELLINKPROP, +#define RTM_DELLINKPROP RTM_DELLINKPROP + RTM_GETLINKPROP, +#define RTM_GETLINKPROP RTM_GETLINKPROP + + RTM_NEWVLAN = 112, +#define RTM_NEWVLAN RTM_NEWVLAN + RTM_DELVLAN, +#define RTM_DELVLAN RTM_DELVLAN + RTM_GETVLAN, +#define RTM_GETVLAN RTM_GETVLAN + + RTM_NEWNEXTHOPBUCKET = 116, +#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET + RTM_DELNEXTHOPBUCKET, +#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET + RTM_GETNEXTHOPBUCKET, +#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + + RTM_NEWTUNNEL = 120, +#define RTM_NEWTUNNEL RTM_NEWTUNNEL + RTM_DELTUNNEL, +#define RTM_DELTUNNEL RTM_DELTUNNEL + RTM_GETTUNNEL, +#define RTM_GETTUNNEL RTM_GETTUNNEL + + __RTM_MAX, +#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) +}; + +#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) +#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) +#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) + +/* + Generic structure for encapsulation of optional route information. + It is reminiscent of sockaddr, but with sa_family replaced + with attribute type. + */ + +struct rtattr { + unsigned short rta_len; + unsigned short rta_type; +}; + +/* Macros to handle rtattributes */ + +#define RTA_ALIGNTO 4U +#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) ) +#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \ + (rta)->rta_len >= sizeof(struct rtattr) && \ + (rta)->rta_len <= (len)) +#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \ + (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len))) +#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) +#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len)) +#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0))) +#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0)) + + + + +/****************************************************************************** + * Definitions used in routing table administration. + ****/ + +struct rtmsg { + unsigned char rtm_family; + unsigned char rtm_dst_len; + unsigned char rtm_src_len; + unsigned char rtm_tos; + + unsigned char rtm_table; /* Routing table id */ + unsigned char rtm_protocol; /* Routing protocol; see below */ + unsigned char rtm_scope; /* See below */ + unsigned char rtm_type; /* See below */ + + unsigned rtm_flags; +}; + +/* rtm_type */ + +enum { + RTN_UNSPEC, + RTN_UNICAST, /* Gateway or direct route */ + RTN_LOCAL, /* Accept locally */ + RTN_BROADCAST, /* Accept locally as broadcast, + send as broadcast */ + RTN_ANYCAST, /* Accept locally as broadcast, + but send as unicast */ + RTN_MULTICAST, /* Multicast route */ + RTN_BLACKHOLE, /* Drop */ + RTN_UNREACHABLE, /* Destination is unreachable */ + RTN_PROHIBIT, /* Administratively prohibited */ + RTN_THROW, /* Not in this table */ + RTN_NAT, /* Translate this address */ + RTN_XRESOLVE, /* Use external resolver */ + __RTN_MAX +}; + +#define RTN_MAX (__RTN_MAX - 1) + + +/* rtm_protocol */ + +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ + +/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; + they are just passed from user and back as is. + It will be used by hypothetical multiple routing daemons. + Note that protocol values should be standardized in order to + avoid conflicts. + */ + +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC/ND router advertisements */ +#define RTPROT_MRT 10 /* Merit MRT */ +#define RTPROT_ZEBRA 11 /* Zebra */ +#define RTPROT_BIRD 12 /* BIRD */ +#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ +#define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ +#define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ +#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */ +#define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_OVN 84 /* OVN daemon */ +#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS 187 /* ISIS Routes */ +#define RTPROT_OSPF 188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ + +/* rtm_scope + + Really it is not scope, but sort of distance to the destination. + NOWHERE are reserved for not existing destinations, HOST is our + local addresses, LINK are destinations, located on directly attached + link and UNIVERSE is everywhere in the Universe. + + Intermediate values are also possible f.e. interior routes + could be assigned a value between UNIVERSE and LINK. +*/ + +enum rt_scope_t { + RT_SCOPE_UNIVERSE=0, +/* User defined values */ + RT_SCOPE_SITE=200, + RT_SCOPE_LINK=253, + RT_SCOPE_HOST=254, + RT_SCOPE_NOWHERE=255 +}; + +/* rtm_flags */ + +#define RTM_F_NOTIFY 0x100 /* Notify user of route change */ +#define RTM_F_CLONED 0x200 /* This route is cloned */ +#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */ +#define RTM_F_PREFIX 0x800 /* Prefix addresses */ +#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */ +#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */ +#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */ +#define RTM_F_TRAP 0x8000 /* route is trapping packets */ +#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value + * is chosen to avoid conflicts with + * other flags defined in + * include/uapi/linux/ipv6_route.h + */ + +/* Reserved table identifiers */ + +enum rt_class_t { + RT_TABLE_UNSPEC=0, +/* User defined values */ + RT_TABLE_COMPAT=252, + RT_TABLE_DEFAULT=253, + RT_TABLE_MAIN=254, + RT_TABLE_LOCAL=255, + RT_TABLE_MAX=0xFFFFFFFF +}; + + +/* Routing message attributes */ + +enum rtattr_type_t { + RTA_UNSPEC, + RTA_DST, + RTA_SRC, + RTA_IIF, + RTA_OIF, + RTA_GATEWAY, + RTA_PRIORITY, + RTA_PREFSRC, + RTA_METRICS, + RTA_MULTIPATH, + RTA_PROTOINFO, /* no longer used */ + RTA_FLOW, + RTA_CACHEINFO, + RTA_SESSION, /* no longer used */ + RTA_MP_ALGO, /* no longer used */ + RTA_TABLE, + RTA_MARK, + RTA_MFC_STATS, + RTA_VIA, + RTA_NEWDST, + RTA_PREF, + RTA_ENCAP_TYPE, + RTA_ENCAP, + RTA_EXPIRES, + RTA_PAD, + RTA_UID, + RTA_TTL_PROPAGATE, + RTA_IP_PROTO, + RTA_SPORT, + RTA_DPORT, + RTA_NH_ID, + RTA_FLOWLABEL, + __RTA_MAX +}; + +#define RTA_MAX (__RTA_MAX - 1) + +#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg)))) +#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg)) + +/* RTM_MULTIPATH --- array of struct rtnexthop. + * + * "struct rtnexthop" describes all necessary nexthop information, + * i.e. parameters of path to a destination via this nexthop. + * + * At the moment it is impossible to set different prefsrc, mtu, window + * and rtt for different paths from multipath. + */ + +struct rtnexthop { + unsigned short rtnh_len; + unsigned char rtnh_flags; + unsigned char rtnh_hops; + int rtnh_ifindex; +}; + +/* rtnh_flags */ + +#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ +#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ +#define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */ +#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ +#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */ +#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */ + +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \ + RTNH_F_OFFLOAD | RTNH_F_TRAP) + +/* Macros to handle hexthops */ + +#define RTNH_ALIGNTO 4 +#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) ) +#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \ + ((int)(rtnh)->rtnh_len) <= (len)) +#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len))) +#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len)) +#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) +#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) + +/* RTA_VIA */ +struct rtvia { + __kernel_sa_family_t rtvia_family; + __u8 rtvia_addr[]; +}; + +/* RTM_CACHEINFO */ + +struct rta_cacheinfo { + __u32 rta_clntref; + __u32 rta_lastuse; + __s32 rta_expires; + __u32 rta_error; + __u32 rta_used; + +#define RTNETLINK_HAVE_PEERINFO 1 + __u32 rta_id; + __u32 rta_ts; + __u32 rta_tsage; +}; + +/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */ + +enum { + RTAX_UNSPEC, +#define RTAX_UNSPEC RTAX_UNSPEC + RTAX_LOCK, +#define RTAX_LOCK RTAX_LOCK + RTAX_MTU, +#define RTAX_MTU RTAX_MTU + RTAX_WINDOW, +#define RTAX_WINDOW RTAX_WINDOW + RTAX_RTT, +#define RTAX_RTT RTAX_RTT + RTAX_RTTVAR, +#define RTAX_RTTVAR RTAX_RTTVAR + RTAX_SSTHRESH, +#define RTAX_SSTHRESH RTAX_SSTHRESH + RTAX_CWND, +#define RTAX_CWND RTAX_CWND + RTAX_ADVMSS, +#define RTAX_ADVMSS RTAX_ADVMSS + RTAX_REORDERING, +#define RTAX_REORDERING RTAX_REORDERING + RTAX_HOPLIMIT, +#define RTAX_HOPLIMIT RTAX_HOPLIMIT + RTAX_INITCWND, +#define RTAX_INITCWND RTAX_INITCWND + RTAX_FEATURES, +#define RTAX_FEATURES RTAX_FEATURES + RTAX_RTO_MIN, +#define RTAX_RTO_MIN RTAX_RTO_MIN + RTAX_INITRWND, +#define RTAX_INITRWND RTAX_INITRWND + RTAX_QUICKACK, +#define RTAX_QUICKACK RTAX_QUICKACK + RTAX_CC_ALGO, +#define RTAX_CC_ALGO RTAX_CC_ALGO + RTAX_FASTOPEN_NO_COOKIE, +#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE + __RTAX_MAX +}; + +#define RTAX_MAX (__RTAX_MAX - 1) + +#define RTAX_FEATURE_ECN (1 << 0) +#define RTAX_FEATURE_SACK (1 << 1) /* unused */ +#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ +#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ +#define RTAX_FEATURE_TCP_USEC_TS (1 << 4) + +#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ + RTAX_FEATURE_SACK | \ + RTAX_FEATURE_TIMESTAMP | \ + RTAX_FEATURE_ALLFRAG | \ + RTAX_FEATURE_TCP_USEC_TS) + +struct rta_session { + __u8 proto; + __u8 pad1; + __u16 pad2; + + union { + struct { + __u16 sport; + __u16 dport; + } ports; + + struct { + __u8 type; + __u8 code; + __u16 ident; + } icmpt; + + __u32 spi; + } u; +}; + +struct rta_mfc_stats { + __u64 mfcs_packets; + __u64 mfcs_bytes; + __u64 mfcs_wrong_if; +}; + +/**** + * General form of address family dependent message. + ****/ + +struct rtgenmsg { + unsigned char rtgen_family; +}; + +/***************************************************************** + * Link layer specific messages. + ****/ + +/* struct ifinfomsg + * passes link level specific information, not dependent + * on network protocol. + */ + +struct ifinfomsg { + unsigned char ifi_family; + unsigned char __ifi_pad; + unsigned short ifi_type; /* ARPHRD_* */ + int ifi_index; /* Link index */ + unsigned ifi_flags; /* IFF_* flags */ + unsigned ifi_change; /* IFF_* change mask */ +}; + +/******************************************************************** + * prefix information + ****/ + +struct prefixmsg { + unsigned char prefix_family; + unsigned char prefix_pad1; + unsigned short prefix_pad2; + int prefix_ifindex; + unsigned char prefix_type; + unsigned char prefix_len; + unsigned char prefix_flags; + unsigned char prefix_pad3; +}; + +enum +{ + PREFIX_UNSPEC, + PREFIX_ADDRESS, + PREFIX_CACHEINFO, + __PREFIX_MAX +}; + +#define PREFIX_MAX (__PREFIX_MAX - 1) + +struct prefix_cacheinfo { + __u32 preferred_time; + __u32 valid_time; +}; + + +/***************************************************************** + * Traffic control messages. + ****/ + +struct tcmsg { + unsigned char tcm_family; + unsigned char tcm__pad1; + unsigned short tcm__pad2; + int tcm_ifindex; + __u32 tcm_handle; + __u32 tcm_parent; +/* tcm_block_index is used instead of tcm_parent + * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK + */ +#define tcm_block_index tcm_parent + __u32 tcm_info; +}; + +/* For manipulation of filters in shared block, tcm_ifindex is set to + * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index + * which is the block index. + */ +#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU) + +enum { + TCA_UNSPEC, + TCA_KIND, + TCA_OPTIONS, + TCA_STATS, + TCA_XSTATS, + TCA_RATE, + TCA_FCNT, + TCA_STATS2, + TCA_STAB, + TCA_PAD, + TCA_DUMP_INVISIBLE, + TCA_CHAIN, + TCA_HW_OFFLOAD, + TCA_INGRESS_BLOCK, + TCA_EGRESS_BLOCK, + TCA_DUMP_FLAGS, + TCA_EXT_WARN_MSG, + __TCA_MAX +}; + +#define TCA_MAX (__TCA_MAX - 1) + +#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic + * data necessary to identify the objects + * (handle, cookie, etc.) and stats. + */ + +#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) +#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) + +/******************************************************************** + * Neighbor Discovery userland options + ****/ + +struct nduseroptmsg { + unsigned char nduseropt_family; + unsigned char nduseropt_pad1; + unsigned short nduseropt_opts_len; /* Total length of options */ + int nduseropt_ifindex; + __u8 nduseropt_icmp_type; + __u8 nduseropt_icmp_code; + unsigned short nduseropt_pad2; + unsigned int nduseropt_pad3; + /* Followed by one or more ND options */ +}; + +enum { + NDUSEROPT_UNSPEC, + NDUSEROPT_SRCADDR, + __NDUSEROPT_MAX +}; + +#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1) + +#ifndef __KERNEL__ +/* RTnetlink multicast groups - backwards compatibility for userspace */ +#define RTMGRP_LINK 1 +#define RTMGRP_NOTIFY 2 +#define RTMGRP_NEIGH 4 +#define RTMGRP_TC 8 + +#define RTMGRP_IPV4_IFADDR 0x10 +#define RTMGRP_IPV4_MROUTE 0x20 +#define RTMGRP_IPV4_ROUTE 0x40 +#define RTMGRP_IPV4_RULE 0x80 + +#define RTMGRP_IPV6_IFADDR 0x100 +#define RTMGRP_IPV6_MROUTE 0x200 +#define RTMGRP_IPV6_ROUTE 0x400 +#define RTMGRP_IPV6_IFINFO 0x800 + +#define RTMGRP_DECnet_IFADDR 0x1000 +#define RTMGRP_DECnet_ROUTE 0x4000 + +#define RTMGRP_IPV6_PREFIX 0x20000 +#endif + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV4_RULE, +#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_NOP2, + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE + RTNLGRP_NOP4, + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE + RTNLGRP_ND_USEROPT, +#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT + RTNLGRP_PHONET_IFADDR, +#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR + RTNLGRP_PHONET_ROUTE, +#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE + RTNLGRP_DCB, +#define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB + RTNLGRP_MPLS_ROUTE, +#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE + RTNLGRP_NSID, +#define RTNLGRP_NSID RTNLGRP_NSID + RTNLGRP_MPLS_NETCONF, +#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF + RTNLGRP_IPV4_MROUTE_R, +#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R + RTNLGRP_IPV6_MROUTE_R, +#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R + RTNLGRP_NEXTHOP, +#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP + RTNLGRP_BRVLAN, +#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN + RTNLGRP_MCTP_IFADDR, +#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR + RTNLGRP_TUNNEL, +#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL + RTNLGRP_STATS, +#define RTNLGRP_STATS RTNLGRP_STATS + RTNLGRP_IPV4_MCADDR, +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR + RTNLGRP_IPV6_MCADDR, +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR + RTNLGRP_IPV6_ACADDR, +#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) + +/* TC action piece */ +struct tcamsg { + unsigned char tca_family; + unsigned char tca__pad1; + unsigned short tca__pad2; +}; + +enum { + TCA_ROOT_UNSPEC, + TCA_ROOT_TAB, +#define TCA_ACT_TAB TCA_ROOT_TAB +#define TCAA_MAX TCA_ROOT_TAB + TCA_ROOT_FLAGS, + TCA_ROOT_COUNT, + TCA_ROOT_TIME_DELTA, /* in msecs */ + TCA_ROOT_EXT_WARN_MSG, + __TCA_ROOT_MAX, +#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) +}; + +#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) +#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS + * + * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than + * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the + * number of actions being dumped stored in for user app's consumption in + * TCA_ROOT_COUNT + * + * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only + * includes essential action info (kind, index, etc.) + * + */ +#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) +#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON +#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1) + +/* New extended info filters for IFLA_EXT_MASK */ +#define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) +#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) +#define RTEXT_FILTER_SKIP_STATS (1 << 3) +#define RTEXT_FILTER_MRP (1 << 4) +#define RTEXT_FILTER_CFM_CONFIG (1 << 5) +#define RTEXT_FILTER_CFM_STATUS (1 << 6) +#define RTEXT_FILTER_MST (1 << 7) + +/* End of information exported to user level */ + + + +#endif /* _UAPI__LINUX_RTNETLINK_H */ |