diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-26 21:02:05 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-26 21:02:05 -0700 |
commit | 592329e5e94e26080f4815c6cc6cd0f487a91064 (patch) | |
tree | 5c2210b93cdfdcec8382aacb0492a5ff827ca930 /mm | |
parent | 336b4dae6dfecc9aa53a3a68c71b9c1c1d466388 (diff) | |
parent | 29fa7d7934216e0a93102a930ef28e2a6ae852b1 (diff) |
Merge tag 'sysctl-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl
Pull sysctl updates from Joel Granados:
- Move vm_table members out of kernel/sysctl.c
All vm_table array members have moved to their respective subsystems
leading to the removal of vm_table from kernel/sysctl.c. This
increases modularity by placing the ctl_tables closer to where they
are actually used and at the same time reducing the chances of merge
conflicts in kernel/sysctl.c.
- ctl_table range fixes
Replace the proc_handler function that checks variable ranges in
coredump_sysctls and vdso_table with the one that actually uses the
extra{1,2} pointers as min/max values. This tightens the range of the
values that users can pass into the kernel effectively preventing
{under,over}flows.
- Misc fixes
Correct grammar errors and typos in test messages. Update sysctl
files in MAINTAINERS. Constified and removed array size in
declaration for alignment_tbl
* tag 'sysctl-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl: (22 commits)
selftests/sysctl: fix wording of help messages
selftests: fix spelling/grammar errors in sysctl/sysctl.sh
MAINTAINERS: Update sysctl file list in MAINTAINERS
sysctl: Fix underflow value setting risk in vm_table
coredump: Fixes core_pipe_limit sysctl proc_handler
sysctl: remove unneeded include
sysctl: remove the vm_table
sh: vdso: move the sysctl to arch/sh/kernel/vsyscall/vsyscall.c
x86: vdso: move the sysctl to arch/x86/entry/vdso/vdso32-setup.c
fs: dcache: move the sysctl to fs/dcache.c
sunrpc: simplify rpcauth_cache_shrink_count()
fs: drop_caches: move sysctl to fs/drop_caches.c
fs: fs-writeback: move sysctl to fs/fs-writeback.c
mm: nommu: move sysctl to mm/nommu.c
security: min_addr: move sysctl to security/min_addr.c
mm: mmap: move sysctl to mm/mmap.c
mm: util: move sysctls to mm/util.c
mm: vmscan: move vmscan sysctls to mm/vmscan.c
mm: swap: move sysctl to mm/swap.c
mm: filemap: move sysctl to mm/filemap.c
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 18 | ||||
-rw-r--r-- | mm/internal.h | 10 | ||||
-rw-r--r-- | mm/mmap.c | 54 | ||||
-rw-r--r-- | mm/nommu.c | 15 | ||||
-rw-r--r-- | mm/swap.c | 16 | ||||
-rw-r--r-- | mm/swap.h | 1 | ||||
-rw-r--r-- | mm/util.c | 67 | ||||
-rw-r--r-- | mm/vmscan.c | 23 | ||||
-rw-r--r-- | mm/vmstat.c | 44 |
9 files changed, 231 insertions, 17 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index e9404290f2c6..27dbfc1193b4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -47,6 +47,7 @@ #include <linux/splice.h> #include <linux/rcupdate_wait.h> #include <linux/sched/mm.h> +#include <linux/sysctl.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include "internal.h" @@ -1077,6 +1078,19 @@ static wait_queue_head_t *folio_waitqueue(struct folio *folio) return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)]; } +/* How many times do we accept lock stealing from under a waiter? */ +static int sysctl_page_lock_unfairness = 5; +static const struct ctl_table filemap_sysctl_table[] = { + { + .procname = "page_lock_unfairness", + .data = &sysctl_page_lock_unfairness, + .maxlen = sizeof(sysctl_page_lock_unfairness), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + } +}; + void __init pagecache_init(void) { int i; @@ -1085,6 +1099,7 @@ void __init pagecache_init(void) init_waitqueue_head(&folio_wait_table[i]); page_writeback_init(); + register_sysctl_init("vm", filemap_sysctl_table); } /* @@ -1232,9 +1247,6 @@ static inline bool folio_trylock_flag(struct folio *folio, int bit_nr, return true; } -/* How many times do we accept lock stealing from under a waiter? */ -int sysctl_page_lock_unfairness = 5; - static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, int state, enum behavior behavior) { diff --git a/mm/internal.h b/mm/internal.h index 20b3535935a3..7eb27ab83ed7 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1097,9 +1097,13 @@ static inline void mminit_verify_zonelist(void) #define NODE_RECLAIM_SUCCESS 1 #ifdef CONFIG_NUMA +extern int node_reclaim_mode; + extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); extern int find_next_best_node(int node, nodemask_t *used_node_mask); #else +#define node_reclaim_mode 0 + static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, unsigned int order) { @@ -1111,6 +1115,12 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask) } #endif +static inline bool node_reclaim_enabled(void) +{ + /* Is any node_reclaim_mode bit set? */ + return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); +} + /* * mm/memory-failure.c */ diff --git a/mm/mmap.c b/mm/mmap.c index cda01071c7b1..d6bbe435bd99 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1543,6 +1543,57 @@ struct vm_area_struct *_install_special_mapping( &special_mapping_vmops); } +#ifdef CONFIG_SYSCTL +#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ + defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT) +int sysctl_legacy_va_layout; +#endif + +static const struct ctl_table mmap_table[] = { + { + .procname = "max_map_count", + .data = &sysctl_max_map_count, + .maxlen = sizeof(sysctl_max_map_count), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, +#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ + defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT) + { + .procname = "legacy_va_layout", + .data = &sysctl_legacy_va_layout, + .maxlen = sizeof(sysctl_legacy_va_layout), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, +#endif +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS + { + .procname = "mmap_rnd_bits", + .data = &mmap_rnd_bits, + .maxlen = sizeof(mmap_rnd_bits), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&mmap_rnd_bits_min, + .extra2 = (void *)&mmap_rnd_bits_max, + }, +#endif +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS + { + .procname = "mmap_rnd_compat_bits", + .data = &mmap_rnd_compat_bits, + .maxlen = sizeof(mmap_rnd_compat_bits), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&mmap_rnd_compat_bits_min, + .extra2 = (void *)&mmap_rnd_compat_bits_max, + }, +#endif +}; +#endif /* CONFIG_SYSCTL */ + /* * initialise the percpu counter for VM */ @@ -1552,6 +1603,9 @@ void __init mmap_init(void) ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); VM_BUG_ON(ret); +#ifdef CONFIG_SYSCTL + register_sysctl_init("vm", mmap_table); +#endif } /* diff --git a/mm/nommu.c b/mm/nommu.c index 9cb6e99215e2..acc18ce611ed 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -48,7 +48,6 @@ struct page *mem_map; unsigned long max_mapnr; EXPORT_SYMBOL(max_mapnr); unsigned long highest_memmap_pfn; -int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int heap_stack_gap = 0; atomic_long_t mmap_pages_allocated; @@ -392,6 +391,19 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) return mm->brk = brk; } +static int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; + +static const struct ctl_table nommu_table[] = { + { + .procname = "nr_trim_pages", + .data = &sysctl_nr_trim_pages, + .maxlen = sizeof(sysctl_nr_trim_pages), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, +}; + /* * initialise the percpu counter for VM and region record slabs */ @@ -402,6 +414,7 @@ void __init mmap_init(void) ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); VM_BUG_ON(ret); vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT); + register_sysctl_init("vm", nommu_table); } /* diff --git a/mm/swap.c b/mm/swap.c index fc8281ef4241..b81cce146eb2 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -45,7 +45,7 @@ /* How many pages do we try to swap or page in/out together? As a power of 2 */ int page_cluster; -const int page_cluster_max = 31; +static const int page_cluster_max = 31; struct cpu_fbatches { /* @@ -1076,6 +1076,18 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch) fbatch->nr = j; } +static const struct ctl_table swap_sysctl_table[] = { + { + .procname = "page-cluster", + .data = &page_cluster, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = (void *)&page_cluster_max, + } +}; + /* * Perform any setup for the swap system */ @@ -1092,4 +1104,6 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ + + register_sysctl_init("vm", swap_sysctl_table); } diff --git a/mm/swap.h b/mm/swap.h index ad2f121de970..274dcc6219a0 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -3,6 +3,7 @@ #define _MM_SWAP_H struct mempolicy; +extern int page_cluster; #ifdef CONFIG_SWAP #include <linux/swapops.h> /* for swp_offset */ diff --git a/mm/util.c b/mm/util.c index e7d81371032b..448117da071f 100644 --- a/mm/util.c +++ b/mm/util.c @@ -12,6 +12,7 @@ #include <linux/security.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/sysctl.h> #include <linux/mman.h> #include <linux/hugetlb.h> #include <linux/vmalloc.h> @@ -747,14 +748,16 @@ int folio_mc_copy(struct folio *dst, struct folio *src) EXPORT_SYMBOL(folio_mc_copy); int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; -int sysctl_overcommit_ratio __read_mostly = 50; -unsigned long sysctl_overcommit_kbytes __read_mostly; +static int sysctl_overcommit_ratio __read_mostly = 50; +static unsigned long sysctl_overcommit_kbytes __read_mostly; int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ -int overcommit_ratio_handler(const struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) +#ifdef CONFIG_SYSCTL + +static int overcommit_ratio_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -769,8 +772,8 @@ static void sync_overcommit_as(struct work_struct *dummy) percpu_counter_sync(&vm_committed_as); } -int overcommit_policy_handler(const struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) +static int overcommit_policy_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int new_policy = -1; @@ -805,8 +808,8 @@ int overcommit_policy_handler(const struct ctl_table *table, int write, void *bu return ret; } -int overcommit_kbytes_handler(const struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) +static int overcommit_kbytes_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -816,6 +819,54 @@ int overcommit_kbytes_handler(const struct ctl_table *table, int write, void *bu return ret; } +static const struct ctl_table util_sysctl_table[] = { + { + .procname = "overcommit_memory", + .data = &sysctl_overcommit_memory, + .maxlen = sizeof(sysctl_overcommit_memory), + .mode = 0644, + .proc_handler = overcommit_policy_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { + .procname = "overcommit_ratio", + .data = &sysctl_overcommit_ratio, + .maxlen = sizeof(sysctl_overcommit_ratio), + .mode = 0644, + .proc_handler = overcommit_ratio_handler, + }, + { + .procname = "overcommit_kbytes", + .data = &sysctl_overcommit_kbytes, + .maxlen = sizeof(sysctl_overcommit_kbytes), + .mode = 0644, + .proc_handler = overcommit_kbytes_handler, + }, + { + .procname = "user_reserve_kbytes", + .data = &sysctl_user_reserve_kbytes, + .maxlen = sizeof(sysctl_user_reserve_kbytes), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "admin_reserve_kbytes", + .data = &sysctl_admin_reserve_kbytes, + .maxlen = sizeof(sysctl_admin_reserve_kbytes), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, +}; + +static int __init init_vm_util_sysctls(void) +{ + register_sysctl_init("vm", util_sysctl_table); + return 0; +} +subsys_initcall(init_vm_util_sysctls); +#endif /* CONFIG_SYSCTL */ + /* * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used */ diff --git a/mm/vmscan.c b/mm/vmscan.c index c767d71c43d7..eb228a8cd769 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -7404,6 +7404,28 @@ void __meminit kswapd_stop(int nid) pgdat_kswapd_unlock(pgdat); } +static const struct ctl_table vmscan_sysctl_table[] = { + { + .procname = "swappiness", + .data = &vm_swappiness, + .maxlen = sizeof(vm_swappiness), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO_HUNDRED, + }, +#ifdef CONFIG_NUMA + { + .procname = "zone_reclaim_mode", + .data = &node_reclaim_mode, + .maxlen = sizeof(node_reclaim_mode), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + } +#endif +}; + static int __init kswapd_init(void) { int nid; @@ -7411,6 +7433,7 @@ static int __init kswapd_init(void) swap_setup(); for_each_node_state(nid, N_MEMORY) kswapd_run(nid); + register_sysctl_init("vm", vmscan_sysctl_table); return 0; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 88998725f1c5..651318765ebf 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -31,8 +31,10 @@ #include "internal.h" +#ifdef CONFIG_PROC_FS #ifdef CONFIG_NUMA -int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; +#define ENABLE_NUMA_STAT 1 +static int sysctl_vm_numa_stat = ENABLE_NUMA_STAT; /* zero numa counters within a zone */ static void zero_zone_numa_counters(struct zone *zone) @@ -74,7 +76,7 @@ static void invalid_numa_statistics(void) static DEFINE_MUTEX(vm_numa_stat_lock); -int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write, +static int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) { int ret, oldval; @@ -102,6 +104,7 @@ out: return ret; } #endif +#endif /* CONFIG_PROC_FS */ #ifdef CONFIG_VM_EVENT_COUNTERS DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; @@ -1940,7 +1943,7 @@ static const struct seq_operations vmstat_op = { #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct delayed_work, vmstat_work); -int sysctl_stat_interval __read_mostly = HZ; +static int sysctl_stat_interval __read_mostly = HZ; static int vmstat_late_init_done; #ifdef CONFIG_PROC_FS @@ -1949,7 +1952,7 @@ static void refresh_vm_stats(struct work_struct *work) refresh_cpu_vm_stats(true); } -int vmstat_refresh(const struct ctl_table *table, int write, +static int vmstat_refresh(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { long val; @@ -2198,6 +2201,38 @@ static int __init vmstat_late_init(void) late_initcall(vmstat_late_init); #endif +#ifdef CONFIG_PROC_FS +static const struct ctl_table vmstat_table[] = { +#ifdef CONFIG_SMP + { + .procname = "stat_interval", + .data = &sysctl_stat_interval, + .maxlen = sizeof(sysctl_stat_interval), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "stat_refresh", + .data = NULL, + .maxlen = 0, + .mode = 0600, + .proc_handler = vmstat_refresh, + }, +#endif +#ifdef CONFIG_NUMA + { + .procname = "numa_stat", + .data = &sysctl_vm_numa_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_vm_numa_stat_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif +}; +#endif + struct workqueue_struct *mm_percpu_wq; void __init init_mm_internals(void) @@ -2229,6 +2264,7 @@ void __init init_mm_internals(void) proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op); proc_create_seq("vmstat", 0444, NULL, &vmstat_op); proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op); + register_sysctl_init("vm", vmstat_table); #endif } |