summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-26 21:02:05 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-26 21:02:05 -0700
commit592329e5e94e26080f4815c6cc6cd0f487a91064 (patch)
tree5c2210b93cdfdcec8382aacb0492a5ff827ca930 /mm
parent336b4dae6dfecc9aa53a3a68c71b9c1c1d466388 (diff)
parent29fa7d7934216e0a93102a930ef28e2a6ae852b1 (diff)
Merge tag 'sysctl-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl
Pull sysctl updates from Joel Granados: - Move vm_table members out of kernel/sysctl.c All vm_table array members have moved to their respective subsystems leading to the removal of vm_table from kernel/sysctl.c. This increases modularity by placing the ctl_tables closer to where they are actually used and at the same time reducing the chances of merge conflicts in kernel/sysctl.c. - ctl_table range fixes Replace the proc_handler function that checks variable ranges in coredump_sysctls and vdso_table with the one that actually uses the extra{1,2} pointers as min/max values. This tightens the range of the values that users can pass into the kernel effectively preventing {under,over}flows. - Misc fixes Correct grammar errors and typos in test messages. Update sysctl files in MAINTAINERS. Constified and removed array size in declaration for alignment_tbl * tag 'sysctl-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl: (22 commits) selftests/sysctl: fix wording of help messages selftests: fix spelling/grammar errors in sysctl/sysctl.sh MAINTAINERS: Update sysctl file list in MAINTAINERS sysctl: Fix underflow value setting risk in vm_table coredump: Fixes core_pipe_limit sysctl proc_handler sysctl: remove unneeded include sysctl: remove the vm_table sh: vdso: move the sysctl to arch/sh/kernel/vsyscall/vsyscall.c x86: vdso: move the sysctl to arch/x86/entry/vdso/vdso32-setup.c fs: dcache: move the sysctl to fs/dcache.c sunrpc: simplify rpcauth_cache_shrink_count() fs: drop_caches: move sysctl to fs/drop_caches.c fs: fs-writeback: move sysctl to fs/fs-writeback.c mm: nommu: move sysctl to mm/nommu.c security: min_addr: move sysctl to security/min_addr.c mm: mmap: move sysctl to mm/mmap.c mm: util: move sysctls to mm/util.c mm: vmscan: move vmscan sysctls to mm/vmscan.c mm: swap: move sysctl to mm/swap.c mm: filemap: move sysctl to mm/filemap.c ...
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c18
-rw-r--r--mm/internal.h10
-rw-r--r--mm/mmap.c54
-rw-r--r--mm/nommu.c15
-rw-r--r--mm/swap.c16
-rw-r--r--mm/swap.h1
-rw-r--r--mm/util.c67
-rw-r--r--mm/vmscan.c23
-rw-r--r--mm/vmstat.c44
9 files changed, 231 insertions, 17 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index e9404290f2c6..27dbfc1193b4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -47,6 +47,7 @@
#include <linux/splice.h>
#include <linux/rcupdate_wait.h>
#include <linux/sched/mm.h>
+#include <linux/sysctl.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -1077,6 +1078,19 @@ static wait_queue_head_t *folio_waitqueue(struct folio *folio)
return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)];
}
+/* How many times do we accept lock stealing from under a waiter? */
+static int sysctl_page_lock_unfairness = 5;
+static const struct ctl_table filemap_sysctl_table[] = {
+ {
+ .procname = "page_lock_unfairness",
+ .data = &sysctl_page_lock_unfairness,
+ .maxlen = sizeof(sysctl_page_lock_unfairness),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ }
+};
+
void __init pagecache_init(void)
{
int i;
@@ -1085,6 +1099,7 @@ void __init pagecache_init(void)
init_waitqueue_head(&folio_wait_table[i]);
page_writeback_init();
+ register_sysctl_init("vm", filemap_sysctl_table);
}
/*
@@ -1232,9 +1247,6 @@ static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
return true;
}
-/* How many times do we accept lock stealing from under a waiter? */
-int sysctl_page_lock_unfairness = 5;
-
static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
int state, enum behavior behavior)
{
diff --git a/mm/internal.h b/mm/internal.h
index 20b3535935a3..7eb27ab83ed7 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1097,9 +1097,13 @@ static inline void mminit_verify_zonelist(void)
#define NODE_RECLAIM_SUCCESS 1
#ifdef CONFIG_NUMA
+extern int node_reclaim_mode;
+
extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
extern int find_next_best_node(int node, nodemask_t *used_node_mask);
#else
+#define node_reclaim_mode 0
+
static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
unsigned int order)
{
@@ -1111,6 +1115,12 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
}
#endif
+static inline bool node_reclaim_enabled(void)
+{
+ /* Is any node_reclaim_mode bit set? */
+ return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP);
+}
+
/*
* mm/memory-failure.c
*/
diff --git a/mm/mmap.c b/mm/mmap.c
index cda01071c7b1..d6bbe435bd99 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1543,6 +1543,57 @@ struct vm_area_struct *_install_special_mapping(
&special_mapping_vmops);
}
+#ifdef CONFIG_SYSCTL
+#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+ defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
+int sysctl_legacy_va_layout;
+#endif
+
+static const struct ctl_table mmap_table[] = {
+ {
+ .procname = "max_map_count",
+ .data = &sysctl_max_map_count,
+ .maxlen = sizeof(sysctl_max_map_count),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+ defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
+ {
+ .procname = "legacy_va_layout",
+ .data = &sysctl_legacy_va_layout,
+ .maxlen = sizeof(sysctl_legacy_va_layout),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+ {
+ .procname = "mmap_rnd_bits",
+ .data = &mmap_rnd_bits,
+ .maxlen = sizeof(mmap_rnd_bits),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&mmap_rnd_bits_min,
+ .extra2 = (void *)&mmap_rnd_bits_max,
+ },
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+ {
+ .procname = "mmap_rnd_compat_bits",
+ .data = &mmap_rnd_compat_bits,
+ .maxlen = sizeof(mmap_rnd_compat_bits),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&mmap_rnd_compat_bits_min,
+ .extra2 = (void *)&mmap_rnd_compat_bits_max,
+ },
+#endif
+};
+#endif /* CONFIG_SYSCTL */
+
/*
* initialise the percpu counter for VM
*/
@@ -1552,6 +1603,9 @@ void __init mmap_init(void)
ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
VM_BUG_ON(ret);
+#ifdef CONFIG_SYSCTL
+ register_sysctl_init("vm", mmap_table);
+#endif
}
/*
diff --git a/mm/nommu.c b/mm/nommu.c
index 9cb6e99215e2..acc18ce611ed 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -48,7 +48,6 @@ struct page *mem_map;
unsigned long max_mapnr;
EXPORT_SYMBOL(max_mapnr);
unsigned long highest_memmap_pfn;
-int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
int heap_stack_gap = 0;
atomic_long_t mmap_pages_allocated;
@@ -392,6 +391,19 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
return mm->brk = brk;
}
+static int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
+
+static const struct ctl_table nommu_table[] = {
+ {
+ .procname = "nr_trim_pages",
+ .data = &sysctl_nr_trim_pages,
+ .maxlen = sizeof(sysctl_nr_trim_pages),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+};
+
/*
* initialise the percpu counter for VM and region record slabs
*/
@@ -402,6 +414,7 @@ void __init mmap_init(void)
ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
VM_BUG_ON(ret);
vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC|SLAB_ACCOUNT);
+ register_sysctl_init("vm", nommu_table);
}
/*
diff --git a/mm/swap.c b/mm/swap.c
index fc8281ef4241..b81cce146eb2 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -45,7 +45,7 @@
/* How many pages do we try to swap or page in/out together? As a power of 2 */
int page_cluster;
-const int page_cluster_max = 31;
+static const int page_cluster_max = 31;
struct cpu_fbatches {
/*
@@ -1076,6 +1076,18 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
fbatch->nr = j;
}
+static const struct ctl_table swap_sysctl_table[] = {
+ {
+ .procname = "page-cluster",
+ .data = &page_cluster,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = (void *)&page_cluster_max,
+ }
+};
+
/*
* Perform any setup for the swap system
*/
@@ -1092,4 +1104,6 @@ void __init swap_setup(void)
* Right now other parts of the system means that we
* _really_ don't want to cluster much more
*/
+
+ register_sysctl_init("vm", swap_sysctl_table);
}
diff --git a/mm/swap.h b/mm/swap.h
index ad2f121de970..274dcc6219a0 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -3,6 +3,7 @@
#define _MM_SWAP_H
struct mempolicy;
+extern int page_cluster;
#ifdef CONFIG_SWAP
#include <linux/swapops.h> /* for swp_offset */
diff --git a/mm/util.c b/mm/util.c
index e7d81371032b..448117da071f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -12,6 +12,7 @@
#include <linux/security.h>
#include <linux/swap.h>
#include <linux/swapops.h>
+#include <linux/sysctl.h>
#include <linux/mman.h>
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
@@ -747,14 +748,16 @@ int folio_mc_copy(struct folio *dst, struct folio *src)
EXPORT_SYMBOL(folio_mc_copy);
int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
-int sysctl_overcommit_ratio __read_mostly = 50;
-unsigned long sysctl_overcommit_kbytes __read_mostly;
+static int sysctl_overcommit_ratio __read_mostly = 50;
+static unsigned long sysctl_overcommit_kbytes __read_mostly;
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
-int overcommit_ratio_handler(const struct ctl_table *table, int write, void *buffer,
- size_t *lenp, loff_t *ppos)
+#ifdef CONFIG_SYSCTL
+
+static int overcommit_ratio_handler(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -769,8 +772,8 @@ static void sync_overcommit_as(struct work_struct *dummy)
percpu_counter_sync(&vm_committed_as);
}
-int overcommit_policy_handler(const struct ctl_table *table, int write, void *buffer,
- size_t *lenp, loff_t *ppos)
+static int overcommit_policy_handler(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table t;
int new_policy = -1;
@@ -805,8 +808,8 @@ int overcommit_policy_handler(const struct ctl_table *table, int write, void *bu
return ret;
}
-int overcommit_kbytes_handler(const struct ctl_table *table, int write, void *buffer,
- size_t *lenp, loff_t *ppos)
+static int overcommit_kbytes_handler(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
@@ -816,6 +819,54 @@ int overcommit_kbytes_handler(const struct ctl_table *table, int write, void *bu
return ret;
}
+static const struct ctl_table util_sysctl_table[] = {
+ {
+ .procname = "overcommit_memory",
+ .data = &sysctl_overcommit_memory,
+ .maxlen = sizeof(sysctl_overcommit_memory),
+ .mode = 0644,
+ .proc_handler = overcommit_policy_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
+ },
+ {
+ .procname = "overcommit_ratio",
+ .data = &sysctl_overcommit_ratio,
+ .maxlen = sizeof(sysctl_overcommit_ratio),
+ .mode = 0644,
+ .proc_handler = overcommit_ratio_handler,
+ },
+ {
+ .procname = "overcommit_kbytes",
+ .data = &sysctl_overcommit_kbytes,
+ .maxlen = sizeof(sysctl_overcommit_kbytes),
+ .mode = 0644,
+ .proc_handler = overcommit_kbytes_handler,
+ },
+ {
+ .procname = "user_reserve_kbytes",
+ .data = &sysctl_user_reserve_kbytes,
+ .maxlen = sizeof(sysctl_user_reserve_kbytes),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "admin_reserve_kbytes",
+ .data = &sysctl_admin_reserve_kbytes,
+ .maxlen = sizeof(sysctl_admin_reserve_kbytes),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+};
+
+static int __init init_vm_util_sysctls(void)
+{
+ register_sysctl_init("vm", util_sysctl_table);
+ return 0;
+}
+subsys_initcall(init_vm_util_sysctls);
+#endif /* CONFIG_SYSCTL */
+
/*
* Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
*/
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c767d71c43d7..eb228a8cd769 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -7404,6 +7404,28 @@ void __meminit kswapd_stop(int nid)
pgdat_kswapd_unlock(pgdat);
}
+static const struct ctl_table vmscan_sysctl_table[] = {
+ {
+ .procname = "swappiness",
+ .data = &vm_swappiness,
+ .maxlen = sizeof(vm_swappiness),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO_HUNDRED,
+ },
+#ifdef CONFIG_NUMA
+ {
+ .procname = "zone_reclaim_mode",
+ .data = &node_reclaim_mode,
+ .maxlen = sizeof(node_reclaim_mode),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ }
+#endif
+};
+
static int __init kswapd_init(void)
{
int nid;
@@ -7411,6 +7433,7 @@ static int __init kswapd_init(void)
swap_setup();
for_each_node_state(nid, N_MEMORY)
kswapd_run(nid);
+ register_sysctl_init("vm", vmscan_sysctl_table);
return 0;
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 88998725f1c5..651318765ebf 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -31,8 +31,10 @@
#include "internal.h"
+#ifdef CONFIG_PROC_FS
#ifdef CONFIG_NUMA
-int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
+#define ENABLE_NUMA_STAT 1
+static int sysctl_vm_numa_stat = ENABLE_NUMA_STAT;
/* zero numa counters within a zone */
static void zero_zone_numa_counters(struct zone *zone)
@@ -74,7 +76,7 @@ static void invalid_numa_statistics(void)
static DEFINE_MUTEX(vm_numa_stat_lock);
-int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write,
+static int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int ret, oldval;
@@ -102,6 +104,7 @@ out:
return ret;
}
#endif
+#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_VM_EVENT_COUNTERS
DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
@@ -1940,7 +1943,7 @@ static const struct seq_operations vmstat_op = {
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
-int sysctl_stat_interval __read_mostly = HZ;
+static int sysctl_stat_interval __read_mostly = HZ;
static int vmstat_late_init_done;
#ifdef CONFIG_PROC_FS
@@ -1949,7 +1952,7 @@ static void refresh_vm_stats(struct work_struct *work)
refresh_cpu_vm_stats(true);
}
-int vmstat_refresh(const struct ctl_table *table, int write,
+static int vmstat_refresh(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
long val;
@@ -2198,6 +2201,38 @@ static int __init vmstat_late_init(void)
late_initcall(vmstat_late_init);
#endif
+#ifdef CONFIG_PROC_FS
+static const struct ctl_table vmstat_table[] = {
+#ifdef CONFIG_SMP
+ {
+ .procname = "stat_interval",
+ .data = &sysctl_stat_interval,
+ .maxlen = sizeof(sysctl_stat_interval),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "stat_refresh",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0600,
+ .proc_handler = vmstat_refresh,
+ },
+#endif
+#ifdef CONFIG_NUMA
+ {
+ .procname = "numa_stat",
+ .data = &sysctl_vm_numa_stat,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sysctl_vm_numa_stat_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif
+};
+#endif
+
struct workqueue_struct *mm_percpu_wq;
void __init init_mm_internals(void)
@@ -2229,6 +2264,7 @@ void __init init_mm_internals(void)
proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
+ register_sysctl_init("vm", vmstat_table);
#endif
}