diff options
37 files changed, 238 insertions, 118 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index cfb0caba3c2d..9344c33c52e1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16127,6 +16127,7 @@ M: Andrew Morton <akpm@linux-foundation.org> M: Mike Rapoport <rppt@kernel.org> L: linux-mm@kvack.org S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git F: include/linux/numa_memblks.h F: mm/numa.c F: mm/numa_emulation.c @@ -16194,6 +16195,7 @@ R: Rik van Riel <riel@surriel.com> R: Liam R. Howlett <Liam.Howlett@oracle.com> R: Vlastimil Babka <vbabka@suse.cz> R: Harry Yoo <harry.yoo@oracle.com> +R: Jann Horn <jannh@google.com> L: linux-mm@kvack.org S: Maintained F: include/linux/rmap.h @@ -16239,6 +16241,7 @@ R: Nico Pache <npache@redhat.com> R: Ryan Roberts <ryan.roberts@arm.com> R: Dev Jain <dev.jain@arm.com> R: Barry Song <baohua@kernel.org> +R: Lance Yang <lance.yang@linux.dev> L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index af1ca875c52c..410060ebd86d 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -94,7 +94,7 @@ int load_other_segments(struct kimage *image, char *initrd, unsigned long initrd_len, char *cmdline) { - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; void *dtb = NULL; unsigned long initrd_load_addr = 0, dtb_len, orig_segments = image->nr_segments; diff --git a/arch/riscv/kernel/kexec_elf.c b/arch/riscv/kernel/kexec_elf.c index 56444c7bd34e..531d348db84d 100644 --- a/arch/riscv/kernel/kexec_elf.c +++ b/arch/riscv/kernel/kexec_elf.c @@ -28,7 +28,7 @@ static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, int i; int ret = 0; size_t size; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; const struct elf_phdr *phdr; kbuf.image = image; @@ -66,7 +66,7 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, { int i; int ret; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; const struct elf_phdr *phdr; unsigned long lowest_paddr = ULONG_MAX; unsigned long lowest_vaddr = ULONG_MAX; diff --git a/arch/riscv/kernel/kexec_image.c b/arch/riscv/kernel/kexec_image.c index 26a81774a78a..8f2eb900910b 100644 --- a/arch/riscv/kernel/kexec_image.c +++ b/arch/riscv/kernel/kexec_image.c @@ -41,7 +41,7 @@ static void *image_load(struct kimage *image, struct riscv_image_header *h; u64 flags; bool be_image, be_kernel; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; int ret; /* Check Image header */ diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c index e36104af2e24..b9eb41b0a975 100644 --- a/arch/riscv/kernel/machine_kexec_file.c +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -261,7 +261,7 @@ int load_extra_segments(struct kimage *image, unsigned long kernel_start, int ret; void *fdt; unsigned long initrd_pbase = 0UL; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; char *modified_cmdline = NULL; kbuf.image = image; diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c index 4d364de43799..143e34a4eca5 100644 --- a/arch/s390/kernel/kexec_elf.c +++ b/arch/s390/kernel/kexec_elf.c @@ -16,7 +16,7 @@ static int kexec_file_add_kernel_elf(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; const Elf_Ehdr *ehdr; const Elf_Phdr *phdr; Elf_Addr entry; diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c index a32ce8bea745..9a439175723c 100644 --- a/arch/s390/kernel/kexec_image.c +++ b/arch/s390/kernel/kexec_image.c @@ -16,7 +16,7 @@ static int kexec_file_add_kernel_image(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; buf.image = image; diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index c2bac14dd668..a36d7311c668 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -129,7 +129,7 @@ static int kexec_file_update_purgatory(struct kimage *image, static int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; int ret; buf.image = image; @@ -152,7 +152,7 @@ static int kexec_file_add_purgatory(struct kimage *image, static int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data) { - struct kexec_buf buf; + struct kexec_buf buf = {}; int ret; buf.image = image; @@ -184,7 +184,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, { __u32 *lc_ipl_parmblock_ptr; unsigned int len, ncerts; - struct kexec_buf buf; + struct kexec_buf buf = {}; unsigned long addr; void *ptr, *end; int ret; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9ac271b82780..78b56cd7698e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1788,6 +1788,7 @@ static int write_same_filled_page(struct zram *zram, unsigned long fill, u32 index) { zram_slot_lock(zram, index); + zram_free_page(zram, index); zram_set_flag(zram, index, ZRAM_SAME); zram_set_handle(zram, index, fill); zram_slot_unlock(zram, index); @@ -1825,6 +1826,7 @@ static int write_incompressible_page(struct zram *zram, struct page *page, kunmap_local(src); zram_slot_lock(zram, index); + zram_free_page(zram, index); zram_set_flag(zram, index, ZRAM_HUGE); zram_set_handle(zram, index, handle); zram_set_obj_size(zram, index, PAGE_SIZE); @@ -1848,11 +1850,6 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) unsigned long element; bool same_filled; - /* First, free memory allocated to this slot (if any) */ - zram_slot_lock(zram, index); - zram_free_page(zram, index); - zram_slot_unlock(zram, index); - mem = kmap_local_page(page); same_filled = page_same_filled(mem, &element); kunmap_local(mem); @@ -1894,6 +1891,7 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index) zcomp_stream_put(zstrm); zram_slot_lock(zram, index); + zram_free_page(zram, index); zram_set_handle(zram, index, handle); zram_set_obj_size(zram, index, comp_len); zram_slot_unlock(zram, index); diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 14868a3dd592..bc52afbfc5c7 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -1075,7 +1075,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs) ************************************************************************/ static ssize_t nilfs_feature_revision_show(struct kobject *kobj, - struct attribute *attr, char *buf) + struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%d.%d\n", NILFS_CURRENT_REV, NILFS_MINOR_REV); @@ -1087,7 +1087,7 @@ static const char features_readme_str[] = "(1) revision\n\tshow current revision of NILFS file system driver.\n"; static ssize_t nilfs_feature_README_show(struct kobject *kobj, - struct attribute *attr, + struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, features_readme_str); diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h index 78a87a016928..d370cd5cce3f 100644 --- a/fs/nilfs2/sysfs.h +++ b/fs/nilfs2/sysfs.h @@ -50,16 +50,16 @@ struct nilfs_sysfs_dev_subgroups { struct completion sg_segments_kobj_unregister; }; -#define NILFS_COMMON_ATTR_STRUCT(name) \ +#define NILFS_KOBJ_ATTR_STRUCT(name) \ struct nilfs_##name##_attr { \ struct attribute attr; \ - ssize_t (*show)(struct kobject *, struct attribute *, \ + ssize_t (*show)(struct kobject *, struct kobj_attribute *, \ char *); \ - ssize_t (*store)(struct kobject *, struct attribute *, \ + ssize_t (*store)(struct kobject *, struct kobj_attribute *, \ const char *, size_t); \ } -NILFS_COMMON_ATTR_STRUCT(feature); +NILFS_KOBJ_ATTR_STRUCT(feature); #define NILFS_DEV_ATTR_STRUCT(name) \ struct nilfs_##name##_attr { \ diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 930150ed5db1..ef147e8b3271 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -706,6 +706,8 @@ out: * it not only handles the fiemap for inlined files, but also deals * with the fast symlink, cause they have no difference for extent * mapping per se. + * + * Must be called with ip_alloc_sem semaphore held. */ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, struct fiemap_extent_info *fieinfo, @@ -717,6 +719,7 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, u64 phys; u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; struct ocfs2_inode_info *oi = OCFS2_I(inode); + lockdep_assert_held_read(&oi->ip_alloc_sem); di = (struct ocfs2_dinode *)di_bh->b_data; if (ocfs2_inode_is_fast_symlink(inode)) @@ -732,8 +735,11 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); + /* Release the ip_alloc_sem to prevent deadlock on page fault */ + up_read(&OCFS2_I(inode)->ip_alloc_sem); ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, flags); + down_read(&OCFS2_I(inode)->ip_alloc_sem); if (ret < 0) return ret; } @@ -802,9 +808,11 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; - + /* Release the ip_alloc_sem to prevent deadlock on page fault */ + up_read(&OCFS2_I(inode)->ip_alloc_sem); ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, len_bytes, fe_flags); + down_read(&OCFS2_I(inode)->ip_alloc_sem); if (ret) break; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index bd0c099cfdd2..176281112273 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -393,7 +393,8 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, if (proc_alloc_inum(&dp->low_ino)) goto out_free_entry; - pde_set_flags(dp); + if (!S_ISDIR(dp->mode)) + pde_set_flags(dp); write_lock(&proc_subdir_lock); dp->parent = dir; diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index fa4ffe037bc7..8720a0705900 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -18,23 +18,42 @@ #define KASAN_ABI_VERSION 5 /* + * Clang 22 added preprocessor macros to match GCC, in hopes of eventually + * dropping __has_feature support for sanitizers: + * https://github.com/llvm/llvm-project/commit/568c23bbd3303518c5056d7f03444dae4fdc8a9c + * Create these macros for older versions of clang so that it is easy to clean + * up once the minimum supported version of LLVM for building the kernel always + * creates these macros. + * * Note: Checking __has_feature(*_sanitizer) is only true if the feature is * enabled. Therefore it is not required to additionally check defined(CONFIG_*) * to avoid adding redundant attributes in other configurations. */ +#if __has_feature(address_sanitizer) && !defined(__SANITIZE_ADDRESS__) +#define __SANITIZE_ADDRESS__ +#endif +#if __has_feature(hwaddress_sanitizer) && !defined(__SANITIZE_HWADDRESS__) +#define __SANITIZE_HWADDRESS__ +#endif +#if __has_feature(thread_sanitizer) && !defined(__SANITIZE_THREAD__) +#define __SANITIZE_THREAD__ +#endif -#if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer) -/* Emulate GCC's __SANITIZE_ADDRESS__ flag */ +/* + * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel. + */ +#ifdef __SANITIZE_HWADDRESS__ #define __SANITIZE_ADDRESS__ +#endif + +#ifdef __SANITIZE_ADDRESS__ #define __no_sanitize_address \ __attribute__((no_sanitize("address", "hwaddress"))) #else #define __no_sanitize_address #endif -#if __has_feature(thread_sanitizer) -/* emulate gcc's __SANITIZE_THREAD__ flag */ -#define __SANITIZE_THREAD__ +#ifdef __SANITIZE_THREAD__ #define __no_sanitize_thread \ __attribute__((no_sanitize("thread"))) #else diff --git a/include/linux/damon.h b/include/linux/damon.h index ec8716292c09..aa7381be388c 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -636,6 +636,7 @@ struct damon_operations { * @data: Data that will be passed to @fn. * @repeat: Repeat invocations. * @return_code: Return code from @fn invocation. + * @dealloc_on_cancel: De-allocate when canceled. * * Control damon_call(), which requests specific kdamond to invoke a given * function. Refer to damon_call() for more details. @@ -645,6 +646,7 @@ struct damon_call_control { void *data; bool repeat; int return_code; + bool dealloc_on_cancel; /* private: internal use only */ /* informs if the kdamond finished handling of the request */ struct completion completion; diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 890011071f2b..fe5ce9215821 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -562,7 +562,7 @@ static inline void kasan_init_hw_tags(void) { } #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); -int kasan_populate_vmalloc(unsigned long addr, unsigned long size); +int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end, @@ -574,7 +574,7 @@ static inline void kasan_populate_early_vm_area_shadow(void *start, unsigned long size) { } static inline int kasan_populate_vmalloc(unsigned long start, - unsigned long size) + unsigned long size, gfp_t gfp_mask) { return 0; } @@ -610,7 +610,7 @@ static __always_inline void kasan_poison_vmalloc(const void *start, static inline void kasan_populate_early_vm_area_shadow(void *start, unsigned long size) { } static inline int kasan_populate_vmalloc(unsigned long start, - unsigned long size) + unsigned long size, gfp_t gfp_mask) { return 0; } diff --git a/include/linux/swap.h b/include/linux/swap.h index c2da85cb7fe7..a2bb20841616 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -381,6 +381,16 @@ void folio_add_lru_vma(struct folio *, struct vm_area_struct *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); +static inline bool folio_may_be_lru_cached(struct folio *folio) +{ + /* + * Holding PMD-sized folios in per-CPU LRU cache unbalances accounting. + * Holding small numbers of low-order mTHP folios in per-CPU LRU cache + * will be sensible, but nobody has implemented and tested that yet. + */ + return !folio_test_large(folio); +} + extern atomic_t lru_disable_count; static inline bool lru_cache_disabled(void) diff --git a/init/main.c b/init/main.c index 0ee0ee7b7c2c..5753e9539ae6 100644 --- a/init/main.c +++ b/init/main.c @@ -956,6 +956,7 @@ void start_kernel(void) sort_main_extable(); trap_init(); mm_core_init(); + maple_tree_init(); poking_init(); ftrace_init(); @@ -973,7 +974,6 @@ void start_kernel(void) "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); radix_tree_init(); - maple_tree_init(); /* * Set up housekeeping before setting up workqueues to allow the unbound diff --git a/mm/damon/core.c b/mm/damon/core.c index 12f136a121a2..be5942435d78 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2141,6 +2141,10 @@ static void damos_adjust_quota(struct damon_ctx *c, struct damos *s) if (!quota->ms && !quota->sz && list_empty("a->goals)) return; + /* First charge window */ + if (!quota->total_charged_sz && !quota->charged_from) + quota->charged_from = jiffies; + /* New charge window starts */ if (time_after_eq(jiffies, quota->charged_from + msecs_to_jiffies(quota->reset_interval))) { @@ -2506,10 +2510,14 @@ static void kdamond_call(struct damon_ctx *ctx, bool cancel) mutex_lock(&ctx->call_controls_lock); list_del(&control->list); mutex_unlock(&ctx->call_controls_lock); - if (!control->repeat) + if (!control->repeat) { complete(&control->completion); - else + } else if (control->canceled && control->dealloc_on_cancel) { + kfree(control); + continue; + } else { list_add(&control->list, &repeat_controls); + } } control = list_first_entry_or_null(&repeat_controls, struct damon_call_control, list); diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c index 151a9de5ad8b..b5a5ed16a7a5 100644 --- a/mm/damon/lru_sort.c +++ b/mm/damon/lru_sort.c @@ -198,6 +198,11 @@ static int damon_lru_sort_apply_parameters(void) if (err) return err; + if (!damon_lru_sort_mon_attrs.sample_interval) { + err = -EINVAL; + goto out; + } + err = damon_set_attrs(ctx, &damon_lru_sort_mon_attrs); if (err) goto out; diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 3c71b4596676..fb7c982a0018 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -194,6 +194,11 @@ static int damon_reclaim_apply_parameters(void) if (err) return err; + if (!damon_reclaim_mon_attrs.aggr_interval) { + err = -EINVAL; + goto out; + } + err = damon_set_attrs(param_ctx, &damon_reclaim_mon_attrs); if (err) goto out; diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 0ed404c89f80..fe4e73d0ebbb 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1292,14 +1292,18 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, { struct damon_sysfs_kdamond *kdamond = container_of(kobj, struct damon_sysfs_kdamond, kobj); - struct damon_ctx *ctx = kdamond->damon_ctx; - bool running; + struct damon_ctx *ctx; + bool running = false; - if (!ctx) - running = false; - else + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + + ctx = kdamond->damon_ctx; + if (ctx) running = damon_is_running(ctx); + mutex_unlock(&damon_sysfs_lock); + return sysfs_emit(buf, "%s\n", running ? damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_ON] : damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_OFF]); @@ -1565,14 +1569,10 @@ static int damon_sysfs_repeat_call_fn(void *data) return 0; } -static struct damon_call_control damon_sysfs_repeat_call_control = { - .fn = damon_sysfs_repeat_call_fn, - .repeat = true, -}; - static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond) { struct damon_ctx *ctx; + struct damon_call_control *repeat_call_control; int err; if (damon_sysfs_kdamond_running(kdamond)) @@ -1585,18 +1585,29 @@ static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond) damon_destroy_ctx(kdamond->damon_ctx); kdamond->damon_ctx = NULL; + repeat_call_control = kmalloc(sizeof(*repeat_call_control), + GFP_KERNEL); + if (!repeat_call_control) + return -ENOMEM; + ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]); - if (IS_ERR(ctx)) + if (IS_ERR(ctx)) { + kfree(repeat_call_control); return PTR_ERR(ctx); + } err = damon_start(&ctx, 1, false); if (err) { + kfree(repeat_call_control); damon_destroy_ctx(ctx); return err; } kdamond->damon_ctx = ctx; - damon_sysfs_repeat_call_control.data = kdamond; - damon_call(ctx, &damon_sysfs_repeat_call_control); + repeat_call_control->fn = damon_sysfs_repeat_call_fn; + repeat_call_control->data = kdamond; + repeat_call_control->repeat = true; + repeat_call_control->dealloc_on_cancel = true; + damon_call(ctx, repeat_call_control); return err; } @@ -2287,8 +2287,8 @@ static unsigned long collect_longterm_unpinnable_folios( struct pages_or_folios *pofs) { unsigned long collected = 0; - bool drain_allow = true; struct folio *folio; + int drained = 0; long i = 0; for (folio = pofs_get_folio(pofs, i); folio; @@ -2307,9 +2307,17 @@ static unsigned long collect_longterm_unpinnable_folios( continue; } - if (!folio_test_lru(folio) && drain_allow) { + if (drained == 0 && folio_may_be_lru_cached(folio) && + folio_ref_count(folio) != + folio_expected_ref_count(folio) + 1) { + lru_add_drain(); + drained = 1; + } + if (drained == 1 && folio_may_be_lru_cached(folio) && + folio_ref_count(folio) != + folio_expected_ref_count(folio) + 1) { lru_add_drain_all(); - drain_allow = false; + drained = 2; } if (!folio_isolate_lru(folio)) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2c06557ee95d..1e777cc51ad0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5854,7 +5854,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, spinlock_t *ptl; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); - bool adjust_reservation = false; + bool adjust_reservation; unsigned long last_addr_mask; bool force_flush = false; @@ -5947,6 +5947,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, sz); hugetlb_count_sub(pages_per_huge_page(h), mm); hugetlb_remove_rmap(folio); + spin_unlock(ptl); /* * Restore the reservation for anonymous page, otherwise the @@ -5954,14 +5955,16 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, * If there we are freeing a surplus, do not set the restore * reservation bit. */ + adjust_reservation = false; + + spin_lock_irq(&hugetlb_lock); if (!h->surplus_huge_pages && __vma_private_lock(vma) && folio_test_anon(folio)) { folio_set_hugetlb_restore_reserve(folio); /* Reservation to be adjusted after the spin lock */ adjust_reservation = true; } - - spin_unlock(ptl); + spin_unlock_irq(&hugetlb_lock); /* * Adjust the reservation for the region that will have the diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index e2ceebf737ef..11d472a5c4e8 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -336,13 +336,13 @@ static void ___free_pages_bulk(struct page **pages, int nr_pages) } } -static int ___alloc_pages_bulk(struct page **pages, int nr_pages) +static int ___alloc_pages_bulk(struct page **pages, int nr_pages, gfp_t gfp_mask) { unsigned long nr_populated, nr_total = nr_pages; struct page **page_array = pages; while (nr_pages) { - nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); + nr_populated = alloc_pages_bulk(gfp_mask, nr_pages, pages); if (!nr_populated) { ___free_pages_bulk(page_array, nr_total - nr_pages); return -ENOMEM; @@ -354,25 +354,42 @@ static int ___alloc_pages_bulk(struct page **pages, int nr_pages) return 0; } -static int __kasan_populate_vmalloc(unsigned long start, unsigned long end) +static int __kasan_populate_vmalloc(unsigned long start, unsigned long end, gfp_t gfp_mask) { unsigned long nr_pages, nr_total = PFN_UP(end - start); struct vmalloc_populate_data data; + unsigned int flags; int ret = 0; - data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO); + data.pages = (struct page **)__get_free_page(gfp_mask | __GFP_ZERO); if (!data.pages) return -ENOMEM; while (nr_total) { nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0])); - ret = ___alloc_pages_bulk(data.pages, nr_pages); + ret = ___alloc_pages_bulk(data.pages, nr_pages, gfp_mask); if (ret) break; data.start = start; + + /* + * page tables allocations ignore external gfp mask, enforce it + * by the scope API + */ + if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO) + flags = memalloc_nofs_save(); + else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0) + flags = memalloc_noio_save(); + ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE, kasan_populate_vmalloc_pte, &data); + + if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO) + memalloc_nofs_restore(flags); + else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0) + memalloc_noio_restore(flags); + ___free_pages_bulk(data.pages, nr_pages); if (ret) break; @@ -386,7 +403,7 @@ static int __kasan_populate_vmalloc(unsigned long start, unsigned long end) return ret; } -int kasan_populate_vmalloc(unsigned long addr, unsigned long size) +int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask) { unsigned long shadow_start, shadow_end; int ret; @@ -415,7 +432,7 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) shadow_start = PAGE_ALIGN_DOWN(shadow_start); shadow_end = PAGE_ALIGN(shadow_end); - ret = __kasan_populate_vmalloc(shadow_start, shadow_end); + ret = __kasan_populate_vmalloc(shadow_start, shadow_end, gfp_mask); if (ret) return ret; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 24e18a7f8a93..4ec324a4c1fe 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1417,8 +1417,8 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, */ if (cc->is_khugepaged && (pte_young(pteval) || folio_test_young(folio) || - folio_test_referenced(folio) || mmu_notifier_test_young(vma->vm_mm, - address))) + folio_test_referenced(folio) || + mmu_notifier_test_young(vma->vm_mm, _address))) referenced++; } if (!writable) { diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c15ffee7d32b..b93ab99ad3ef 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -956,7 +956,7 @@ static const char * const action_page_types[] = { [MF_MSG_BUDDY] = "free buddy page", [MF_MSG_DAX] = "dax page", [MF_MSG_UNSPLIT_THP] = "unsplit thp", - [MF_MSG_ALREADY_POISONED] = "already poisoned", + [MF_MSG_ALREADY_POISONED] = "already poisoned page", [MF_MSG_UNKNOWN] = "unknown page", }; @@ -1349,9 +1349,10 @@ static int action_result(unsigned long pfn, enum mf_action_page_type type, { trace_memory_failure_event(pfn, type, result); - num_poisoned_pages_inc(pfn); - - update_per_node_mf_stats(pfn, result); + if (type != MF_MSG_ALREADY_POISONED) { + num_poisoned_pages_inc(pfn); + update_per_node_mf_stats(pfn, result); + } pr_err("%#lx: recovery action for %s: %s\n", pfn, action_page_types[type], action_name[result]); @@ -2094,12 +2095,11 @@ retry: *hugetlb = 0; return 0; } else if (res == -EHWPOISON) { - pr_err("%#lx: already hardware poisoned\n", pfn); if (flags & MF_ACTION_REQUIRED) { folio = page_folio(p); res = kill_accessing_process(current, folio_pfn(folio), flags); - action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); } + action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); return res; } else if (res == -EBUSY) { if (!(flags & MF_NO_RETRY)) { @@ -2285,7 +2285,6 @@ try_again: goto unlock_mutex; if (TestSetPageHWPoison(p)) { - pr_err("%#lx: already hardware poisoned\n", pfn); res = -EHWPOISON; if (flags & MF_ACTION_REQUIRED) res = kill_accessing_process(current, pfn, flags); @@ -2569,10 +2568,9 @@ int unpoison_memory(unsigned long pfn) static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - if (!pfn_valid(pfn)) - return -ENXIO; - - p = pfn_to_page(pfn); + p = pfn_to_online_page(pfn); + if (!p) + return -EIO; folio = page_folio(p); mutex_lock(&mf_mutex); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1f15af712bc3..74318c787715 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1815,8 +1815,14 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; if (folio_contain_hwpoisoned_page(folio)) { - if (WARN_ON(folio_test_lru(folio))) - folio_isolate_lru(folio); + /* + * unmap_poisoned_folio() cannot handle large folios + * in all cases yet. + */ + if (folio_test_large(folio) && !folio_test_hugetlb(folio)) + goto put_folio; + if (folio_test_lru(folio) && !folio_isolate_lru(folio)) + goto put_folio; if (folio_mapped(folio)) { folio_lock(folio); unmap_poisoned_folio(folio, pfn, false); diff --git a/mm/mlock.c b/mm/mlock.c index a1d93ad33c6d..bb0776f5ef7c 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -255,7 +255,7 @@ void mlock_folio(struct folio *folio) folio_get(folio); if (!folio_batch_add(fbatch, mlock_lru(folio)) || - folio_test_large(folio) || lru_cache_disabled()) + !folio_may_be_lru_cached(folio) || lru_cache_disabled()) mlock_folio_batch(fbatch); local_unlock(&mlock_fbatch.lock); } @@ -278,7 +278,7 @@ void mlock_new_folio(struct folio *folio) folio_get(folio); if (!folio_batch_add(fbatch, mlock_new(folio)) || - folio_test_large(folio) || lru_cache_disabled()) + !folio_may_be_lru_cached(folio) || lru_cache_disabled()) mlock_folio_batch(fbatch); local_unlock(&mlock_fbatch.lock); } @@ -299,7 +299,7 @@ void munlock_folio(struct folio *folio) */ folio_get(folio); if (!folio_batch_add(fbatch, folio) || - folio_test_large(folio) || lru_cache_disabled()) + !folio_may_be_lru_cached(folio) || lru_cache_disabled()) mlock_folio_batch(fbatch); local_unlock(&mlock_fbatch.lock); } diff --git a/mm/mremap.c b/mm/mremap.c index e618a706aff5..35de0a7b910e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1774,15 +1774,18 @@ static unsigned long check_mremap_params(struct vma_remap_struct *vrm) if (!vrm->new_len) return -EINVAL; - /* Is the new length or address silly? */ - if (vrm->new_len > TASK_SIZE || - vrm->new_addr > TASK_SIZE - vrm->new_len) + /* Is the new length silly? */ + if (vrm->new_len > TASK_SIZE) return -EINVAL; /* Remainder of checks are for cases with specific new_addr. */ if (!vrm_implies_new_addr(vrm)) return 0; + /* Is the new address silly? */ + if (vrm->new_addr > TASK_SIZE - vrm->new_len) + return -EINVAL; + /* The new address must be page-aligned. */ if (offset_in_page(vrm->new_addr)) return -EINVAL; diff --git a/mm/percpu.c b/mm/percpu.c index a56f35dcc417..81462ce5866e 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1734,7 +1734,7 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, bool is_atomic; bool do_warn; struct obj_cgroup *objcg = NULL; - static int warn_limit = 10; + static atomic_t warn_limit = ATOMIC_INIT(10); struct pcpu_chunk *chunk, *next; const char *err; int slot, off, cpu, ret; @@ -1904,13 +1904,17 @@ fail_unlock: fail: trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); - if (do_warn && warn_limit) { - pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", - size, align, is_atomic, err); - if (!is_atomic) - dump_stack(); - if (!--warn_limit) - pr_info("limit reached, disable warning\n"); + if (do_warn) { + int remaining = atomic_dec_if_positive(&warn_limit); + + if (remaining >= 0) { + pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", + size, align, is_atomic, err); + if (!is_atomic) + dump_stack(); + if (remaining == 0) + pr_info("limit reached, disable warning\n"); + } } if (is_atomic) { diff --git a/mm/swap.c b/mm/swap.c index 6dd22a904b37..b8cea6a1b86f 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -164,6 +164,10 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn) for (i = 0; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; + /* block memcg migration while the folio moves between lru */ + if (move_fn != lru_add && !folio_test_clear_lru(folio)) + continue; + folio_lruvec_relock_irqsave(folio, &lruvec, &flags); move_fn(lruvec, folio); @@ -176,14 +180,10 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn) } static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch, - struct folio *folio, move_fn_t move_fn, - bool on_lru, bool disable_irq) + struct folio *folio, move_fn_t move_fn, bool disable_irq) { unsigned long flags; - if (on_lru && !folio_test_clear_lru(folio)) - return; - folio_get(folio); if (disable_irq) @@ -191,8 +191,8 @@ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch, else local_lock(&cpu_fbatches.lock); - if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || folio_test_large(folio) || - lru_cache_disabled()) + if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || + !folio_may_be_lru_cached(folio) || lru_cache_disabled()) folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn); if (disable_irq) @@ -201,13 +201,13 @@ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch, local_unlock(&cpu_fbatches.lock); } -#define folio_batch_add_and_move(folio, op, on_lru) \ - __folio_batch_add_and_move( \ - &cpu_fbatches.op, \ - folio, \ - op, \ - on_lru, \ - offsetof(struct cpu_fbatches, op) >= offsetof(struct cpu_fbatches, lock_irq) \ +#define folio_batch_add_and_move(folio, op) \ + __folio_batch_add_and_move( \ + &cpu_fbatches.op, \ + folio, \ + op, \ + offsetof(struct cpu_fbatches, op) >= \ + offsetof(struct cpu_fbatches, lock_irq) \ ) static void lru_move_tail(struct lruvec *lruvec, struct folio *folio) @@ -231,10 +231,10 @@ static void lru_move_tail(struct lruvec *lruvec, struct folio *folio) void folio_rotate_reclaimable(struct folio *folio) { if (folio_test_locked(folio) || folio_test_dirty(folio) || - folio_test_unevictable(folio)) + folio_test_unevictable(folio) || !folio_test_lru(folio)) return; - folio_batch_add_and_move(folio, lru_move_tail, true); + folio_batch_add_and_move(folio, lru_move_tail); } void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file, @@ -328,10 +328,11 @@ static void folio_activate_drain(int cpu) void folio_activate(struct folio *folio) { - if (folio_test_active(folio) || folio_test_unevictable(folio)) + if (folio_test_active(folio) || folio_test_unevictable(folio) || + !folio_test_lru(folio)) return; - folio_batch_add_and_move(folio, lru_activate, true); + folio_batch_add_and_move(folio, lru_activate); } #else @@ -507,7 +508,7 @@ void folio_add_lru(struct folio *folio) lru_gen_in_fault() && !(current->flags & PF_MEMALLOC)) folio_set_active(folio); - folio_batch_add_and_move(folio, lru_add, false); + folio_batch_add_and_move(folio, lru_add); } EXPORT_SYMBOL(folio_add_lru); @@ -685,13 +686,13 @@ void lru_add_drain_cpu(int cpu) void deactivate_file_folio(struct folio *folio) { /* Deactivating an unevictable folio will not accelerate reclaim */ - if (folio_test_unevictable(folio)) + if (folio_test_unevictable(folio) || !folio_test_lru(folio)) return; if (lru_gen_enabled() && lru_gen_clear_refs(folio)) return; - folio_batch_add_and_move(folio, lru_deactivate_file, true); + folio_batch_add_and_move(folio, lru_deactivate_file); } /* @@ -704,13 +705,13 @@ void deactivate_file_folio(struct folio *folio) */ void folio_deactivate(struct folio *folio) { - if (folio_test_unevictable(folio)) + if (folio_test_unevictable(folio) || !folio_test_lru(folio)) return; if (lru_gen_enabled() ? lru_gen_clear_refs(folio) : !folio_test_active(folio)) return; - folio_batch_add_and_move(folio, lru_deactivate, true); + folio_batch_add_and_move(folio, lru_deactivate); } /** @@ -723,10 +724,11 @@ void folio_deactivate(struct folio *folio) void folio_mark_lazyfree(struct folio *folio) { if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) || + !folio_test_lru(folio) || folio_test_swapcache(folio) || folio_test_unevictable(folio)) return; - folio_batch_add_and_move(folio, lru_lazyfree, true); + folio_batch_add_and_move(folio, lru_lazyfree); } void lru_add_drain(void) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 710fe512368f..4249e1e01947 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2026,6 +2026,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, if (unlikely(!vmap_initialized)) return ERR_PTR(-EBUSY); + /* Only reclaim behaviour flags are relevant. */ + gfp_mask = gfp_mask & GFP_RECLAIM_MASK; might_sleep(); /* @@ -2038,8 +2040,6 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, */ va = node_alloc(size, align, vstart, vend, &addr, &vn_id); if (!va) { - gfp_mask = gfp_mask & GFP_RECLAIM_MASK; - va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node); if (unlikely(!va)) return ERR_PTR(-ENOMEM); @@ -2089,7 +2089,7 @@ retry: BUG_ON(va->va_start < vstart); BUG_ON(va->va_end > vend); - ret = kasan_populate_vmalloc(addr, size); + ret = kasan_populate_vmalloc(addr, size, gfp_mask); if (ret) { free_vmap_area(va); return ERR_PTR(ret); @@ -4845,7 +4845,7 @@ retry: /* populate the kasan shadow space */ for (area = 0; area < nr_vms; area++) { - if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) + if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area], GFP_KERNEL)) goto err_free_shadow; } diff --git a/mm/vmscan.c b/mm/vmscan.c index c88eb223ade2..ca9e1cd3cd68 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4500,7 +4500,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c } /* ineligible */ - if (!folio_test_lru(folio) || zone > sc->reclaim_idx) { + if (zone > sc->reclaim_idx) { gen = folio_inc_gen(lruvec, folio, false); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); return true; diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c index 7ebd352138e4..beaf36657dea 100644 --- a/samples/damon/mtier.c +++ b/samples/damon/mtier.c @@ -208,6 +208,9 @@ static int damon_sample_mtier_enable_store( if (enabled == is_enabled) return 0; + if (!init_called) + return 0; + if (enabled) { err = damon_sample_mtier_start(); if (err) diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c index 1b839c06a612..0226652f94d5 100644 --- a/samples/damon/prcl.c +++ b/samples/damon/prcl.c @@ -137,6 +137,9 @@ static int damon_sample_prcl_enable_store( if (enabled == is_enabled) return 0; + if (!init_called) + return 0; + if (enabled) { err = damon_sample_prcl_start(); if (err) diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c index da052023b099..21eaf15f987d 100644 --- a/samples/damon/wsse.c +++ b/samples/damon/wsse.c @@ -118,6 +118,9 @@ static int damon_sample_wsse_enable_store( return 0; if (enabled) { + if (!init_called) + return 0; + err = damon_sample_wsse_start(); if (err) enabled = false; |