diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/mntpt.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/util.h | 3 | ||||
-rw-r--r-- | fs/configfs/Kconfig | 1 | ||||
-rw-r--r-- | fs/dax.c | 3 | ||||
-rw-r--r-- | fs/exec.c | 69 | ||||
-rw-r--r-- | fs/fuse/dir.c | 3 | ||||
-rw-r--r-- | fs/fuse/file.c | 4 | ||||
-rw-r--r-- | fs/jfs/jfs_metapage.c | 106 | ||||
-rw-r--r-- | fs/namespace.c | 24 | ||||
-rw-r--r-- | fs/nfs/namespace.c | 1 | ||||
-rw-r--r-- | fs/nilfs2/btree.c | 4 | ||||
-rw-r--r-- | fs/nilfs2/direct.c | 3 | ||||
-rw-r--r-- | fs/nilfs2/mdt.c | 2 | ||||
-rw-r--r-- | fs/nilfs2/segment.c | 16 | ||||
-rw-r--r-- | fs/nilfs2/segment.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/filecheck.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/quota_local.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/stackglue.c | 3 | ||||
-rw-r--r-- | fs/pipe.c | 3 | ||||
-rw-r--r-- | fs/proc/base.c | 12 | ||||
-rw-r--r-- | fs/proc/page.c | 161 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 29 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 4 | ||||
-rw-r--r-- | fs/smb/client/namespace.c | 1 | ||||
-rw-r--r-- | fs/squashfs/Kconfig | 21 | ||||
-rw-r--r-- | fs/squashfs/block.c | 28 | ||||
-rw-r--r-- | fs/squashfs/super.c | 5 | ||||
-rw-r--r-- | fs/super.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_zone_gc.c | 2 |
30 files changed, 268 insertions, 257 deletions
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 45cee6534122..9434a5399f2b 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -189,7 +189,6 @@ struct vfsmount *afs_d_automount(struct path *path) if (IS_ERR(newmnt)) return newmnt; - mntget(newmnt); /* prevent immediate expiration */ mnt_set_expiry(newmnt, &afs_vfsmounts); queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, afs_mntpt_expiry_timeout * HZ); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 25cf61ebd40c..0a4b1d433621 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -17,6 +17,7 @@ #include <linux/random.h> #include <linux/ratelimit.h> #include <linux/slab.h> +#include <linux/sort.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> @@ -672,8 +673,6 @@ static inline void percpu_memset(void __percpu *p, int c, size_t bytes) u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned); -#define cmp_int(l, r) ((l > r) - (l < r)) - static inline int u8_cmp(u8 l, u8 r) { return cmp_int(l, r); diff --git a/fs/configfs/Kconfig b/fs/configfs/Kconfig index 272b64456999..1fcd761fe7be 100644 --- a/fs/configfs/Kconfig +++ b/fs/configfs/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config CONFIGFS_FS tristate "Userspace-driven configuration filesystem" - select SYSFS help configfs is a RAM-based filesystem that provides the converse of sysfs's functionality. Where sysfs is a filesystem-based @@ -1422,8 +1422,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf, pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); mm_inc_nr_ptes(vma->vm_mm); } - pmd_entry = mk_pmd(&zero_folio->page, vmf->vma->vm_page_prot); - pmd_entry = pmd_mkhuge(pmd_entry); + pmd_entry = folio_mk_pmd(zero_folio, vmf->vma->vm_page_prot); set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry); spin_unlock(ptl); trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry); diff --git a/fs/exec.c b/fs/exec.c index cfbb2b9ee3c9..1f5fdd2e096e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -78,6 +78,9 @@ #include <trace/events/sched.h> +/* For vma exec functions. */ +#include "../mm/internal.h" + static int bprm_creds_from_file(struct linux_binprm *bprm); int suid_dumpable = 0; @@ -182,60 +185,6 @@ static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, flush_cache_page(bprm->vma, pos, page_to_pfn(page)); } -static int __bprm_mm_init(struct linux_binprm *bprm) -{ - int err; - struct vm_area_struct *vma = NULL; - struct mm_struct *mm = bprm->mm; - - bprm->vma = vma = vm_area_alloc(mm); - if (!vma) - return -ENOMEM; - vma_set_anonymous(vma); - - if (mmap_write_lock_killable(mm)) { - err = -EINTR; - goto err_free; - } - - /* - * Need to be called with mmap write lock - * held, to avoid race with ksmd. - */ - err = ksm_execve(mm); - if (err) - goto err_ksm; - - /* - * Place the stack at the largest stack address the architecture - * supports. Later, we'll move this to an appropriate place. We don't - * use STACK_TOP because that can depend on attributes which aren't - * configured yet. - */ - BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); - vma->vm_end = STACK_TOP_MAX; - vma->vm_start = vma->vm_end - PAGE_SIZE; - vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP); - vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - - err = insert_vm_struct(mm, vma); - if (err) - goto err; - - mm->stack_vm = mm->total_vm = 1; - mmap_write_unlock(mm); - bprm->p = vma->vm_end - sizeof(void *); - return 0; -err: - ksm_exit(mm); -err_ksm: - mmap_write_unlock(mm); -err_free: - bprm->vma = NULL; - vm_area_free(vma); - return err; -} - static bool valid_arg_len(struct linux_binprm *bprm, long len) { return len <= MAX_ARG_STRLEN; @@ -288,12 +237,6 @@ static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, { } -static int __bprm_mm_init(struct linux_binprm *bprm) -{ - bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *); - return 0; -} - static bool valid_arg_len(struct linux_binprm *bprm, long len) { return len <= bprm->p; @@ -322,9 +265,13 @@ static int bprm_mm_init(struct linux_binprm *bprm) bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK]; task_unlock(current->group_leader); - err = __bprm_mm_init(bprm); +#ifndef CONFIG_MMU + bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *); +#else + err = create_init_stack_vma(bprm->mm, &bprm->vma, &bprm->p); if (err) goto err; +#endif return 0; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 33b82529cb6e..7d7ed45cb3e9 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -319,9 +319,6 @@ static struct vfsmount *fuse_dentry_automount(struct path *path) /* Create the submount */ mnt = fc_mount(fsc); - if (!IS_ERR(mnt)) - mntget(mnt); - put_fs_context(fsc); return mnt; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 754378dd9f71..6f19a4daa559 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -487,7 +487,7 @@ static inline bool fuse_folio_is_writeback(struct inode *inode, struct folio *folio) { pgoff_t last = folio_next_index(folio) - 1; - return fuse_range_is_writeback(inode, folio_index(folio), last); + return fuse_range_is_writeback(inode, folio->index, last); } static void fuse_wait_on_folio_writeback(struct inode *inode, @@ -2349,7 +2349,7 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio, return true; /* Discontinuity */ - if (data->orig_folios[ap->num_folios - 1]->index + 1 != folio_index(folio)) + if (data->orig_folios[ap->num_folios - 1]->index + 1 != folio->index) return true; /* Need to grow the pages array? If so, did the expansion fail? */ diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index df575a873ec6..9029cd216912 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -15,6 +15,7 @@ #include <linux/mempool.h> #include <linux/seq_file.h> #include <linux/writeback.h> +#include <linux/migrate.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -151,7 +152,59 @@ static inline void dec_io(struct folio *folio, blk_status_t status, handler(folio, anchor->status); } +#ifdef CONFIG_MIGRATION +static int __metapage_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, + enum migrate_mode mode) +{ + struct meta_anchor *src_anchor = src->private; + struct metapage *mps[MPS_PER_PAGE] = {0}; + struct metapage *mp; + int i, rc; + + for (i = 0; i < MPS_PER_PAGE; i++) { + mp = src_anchor->mp[i]; + if (mp && metapage_locked(mp)) + return -EAGAIN; + } + + rc = filemap_migrate_folio(mapping, dst, src, mode); + if (rc != MIGRATEPAGE_SUCCESS) + return rc; + + for (i = 0; i < MPS_PER_PAGE; i++) { + mp = src_anchor->mp[i]; + if (!mp) + continue; + if (unlikely(insert_metapage(dst, mp))) { + /* If error, roll-back previosly inserted pages */ + for (int j = 0 ; j < i; j++) { + if (mps[j]) + remove_metapage(dst, mps[j]); + } + return -EAGAIN; + } + mps[i] = mp; + } + + /* Update the metapage and remove it from src */ + for (i = 0; i < MPS_PER_PAGE; i++) { + mp = mps[i]; + if (mp) { + int page_offset = mp->data - folio_address(src); + + mp->data = folio_address(dst) + page_offset; + mp->folio = dst; + remove_metapage(src, mp); + } + } + + return MIGRATEPAGE_SUCCESS; +} +#endif /* CONFIG_MIGRATION */ + #else + static inline struct metapage *folio_to_mp(struct folio *folio, int offset) { return folio->private; @@ -175,6 +228,35 @@ static inline void remove_metapage(struct folio *folio, struct metapage *mp) #define inc_io(folio) do {} while(0) #define dec_io(folio, status, handler) handler(folio, status) +#ifdef CONFIG_MIGRATION +static int __metapage_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, + enum migrate_mode mode) +{ + struct metapage *mp; + int page_offset; + int rc; + + mp = folio_to_mp(src, 0); + if (metapage_locked(mp)) + return -EAGAIN; + + rc = filemap_migrate_folio(mapping, dst, src, mode); + if (rc != MIGRATEPAGE_SUCCESS) + return rc; + + if (unlikely(insert_metapage(dst, mp))) + return -EAGAIN; + + page_offset = mp->data - folio_address(src); + mp->data = folio_address(dst) + page_offset; + mp->folio = dst; + remove_metapage(src, mp); + + return MIGRATEPAGE_SUCCESS; +} +#endif /* CONFIG_MIGRATION */ + #endif static inline struct metapage *alloc_metapage(gfp_t gfp_mask) @@ -554,6 +636,29 @@ static bool metapage_release_folio(struct folio *folio, gfp_t gfp_mask) return ret; } +#ifdef CONFIG_MIGRATION +/* + * metapage_migrate_folio - Migration function for JFS metapages + */ +static int metapage_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, + enum migrate_mode mode) +{ + int expected_count; + + if (!src->private) + return filemap_migrate_folio(mapping, dst, src, mode); + + /* Check whether page does not have extra refs before we do more work */ + expected_count = folio_expected_ref_count(src) + 1; + if (folio_ref_count(src) != expected_count) + return -EAGAIN; + return __metapage_migrate_folio(mapping, dst, src, mode); +} +#else +#define metapage_migrate_folio NULL +#endif /* CONFIG_MIGRATION */ + static void metapage_invalidate_folio(struct folio *folio, size_t offset, size_t length) { @@ -570,6 +675,7 @@ const struct address_space_operations jfs_metapage_aops = { .release_folio = metapage_release_folio, .invalidate_folio = metapage_invalidate_folio, .dirty_folio = filemap_dirty_folio, + .migrate_folio = metapage_migrate_folio, }; struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, diff --git a/fs/namespace.c b/fs/namespace.c index 7c0ebc4f4ef2..2f2e93927f46 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1326,21 +1326,6 @@ struct vfsmount *vfs_kern_mount(struct file_system_type *type, } EXPORT_SYMBOL_GPL(vfs_kern_mount); -struct vfsmount * -vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, - const char *name, void *data) -{ - /* Until it is worked out how to pass the user namespace - * through from the parent mount to the submount don't support - * unprivileged mounts with submounts. - */ - if (mountpoint->d_sb->s_user_ns != &init_user_ns) - return ERR_PTR(-EPERM); - - return vfs_kern_mount(type, SB_SUBMOUNT, name, data); -} -EXPORT_SYMBOL_GPL(vfs_submount); - static struct mount *clone_mnt(struct mount *old, struct dentry *root, int flag) { @@ -3889,10 +3874,6 @@ int finish_automount(struct vfsmount *m, const struct path *path) return PTR_ERR(m); mnt = real_mount(m); - /* The new mount record should have at least 2 refs to prevent it being - * expired before we get a chance to add it - */ - BUG_ON(mnt_get_count(mnt) < 2); if (m->mnt_sb == path->mnt->mnt_sb && m->mnt_root == dentry) { @@ -3925,7 +3906,6 @@ int finish_automount(struct vfsmount *m, const struct path *path) unlock_mount(mp); if (unlikely(err)) goto discard; - mntput(m); return 0; discard_locked: @@ -3939,7 +3919,6 @@ discard: namespace_unlock(); } mntput(m); - mntput(m); return err; } @@ -3976,11 +3955,14 @@ void mark_mounts_for_expiry(struct list_head *mounts) /* extract from the expiration list every vfsmount that matches the * following criteria: + * - already mounted * - only referenced by its parent vfsmount * - still marked for expiry (marked on the last call here; marks are * cleared by mntput()) */ list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { + if (!is_mounted(&mnt->mnt)) + continue; if (!xchg(&mnt->mnt_expiry_mark, 1) || propagate_mount_busy(mnt, 1)) continue; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 973aed9cc5fe..7f1ec9c67ff2 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -195,7 +195,6 @@ struct vfsmount *nfs_d_automount(struct path *path) if (IS_ERR(mnt)) goto out_fc; - mntget(mnt); /* prevent immediate expiration */ if (timeout <= 0) goto out_fc; diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 0d8f7fb15c2e..dd0c8e560ef6 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -2102,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { - if (unlikely(ret == -ENOENT)) + if (unlikely(ret == -ENOENT)) { nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); + ret = -EINVAL; + } goto out; } diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 893ab36824cc..2d8dc6b35b54 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap, dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); ptr = nilfs_direct_get_ptr(bmap, key); + if (ptr == NILFS_BMAP_INVALID_PTR) + return -EINVAL; + if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 2f850a18d6e7..946b0d3534a5 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -422,8 +422,6 @@ static int nilfs_mdt_write_folio(struct folio *folio, if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); - else if (wbc->for_reclaim) - nilfs_flush_segment(sb, inode->i_ino); return err; } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 83970d97840b..61a4141f8d6b 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2221,22 +2221,6 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) spin_unlock(&sci->sc_state_lock); } -/** - * nilfs_flush_segment - trigger a segment construction for resource control - * @sb: super block - * @ino: inode number of the file to be flushed out. - */ -void nilfs_flush_segment(struct super_block *sb, ino_t ino) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - struct nilfs_sc_info *sci = nilfs->ns_writer; - - if (!sci || nilfs_doing_construction()) - return; - nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); - /* assign bit 0 to data files */ -} - struct nilfs_segctor_wait_request { wait_queue_entry_t wq; __u32 seq; diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index f723f47ddc4e..4b39ed43ae72 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -226,7 +226,6 @@ extern void nilfs_relax_pressure_in_lock(struct super_block *); extern int nilfs_construct_segment(struct super_block *); extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *, loff_t, loff_t); -extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index fce9beb214f0..43e652a2adaf 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1483,7 +1483,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work) sc_put(sc); } -/* socket shutdown does a del_timer_sync against this as it tears down. +/* socket shutdown does a timer_delete_sync against this as it tears down. * we can't start this timer until we've got to the point in sc buildup * where shutdown is going to be involved */ static void o2net_idle_timer(struct timer_list *t) diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c index 1ad7106741f8..3ad7baf67658 100644 --- a/fs/ocfs2/filecheck.c +++ b/fs/ocfs2/filecheck.c @@ -505,5 +505,5 @@ static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj, ocfs2_filecheck_handle_entry(ent, entry); exit: - return (!ret ? count : ret); + return ret ?: count; } diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index e272429da3db..de7f12858729 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -674,7 +674,7 @@ out_put: break; } out: - kfree(rec); + ocfs2_free_quota_recovery(rec); return status; } diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index ddd761cf44c8..a28c127b9934 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -691,8 +691,7 @@ static void __exit ocfs2_stack_glue_exit(void) memset(&locking_max_version, 0, sizeof(struct ocfs2_protocol_version)); ocfs2_sysfs_exit(); - if (ocfs2_table_header) - unregister_sysctl_table(ocfs2_table_header); + unregister_sysctl_table(ocfs2_table_header); } MODULE_AUTHOR("Oracle"); diff --git a/fs/pipe.c b/fs/pipe.c index da45edd68c41..45077c37bad1 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -26,6 +26,7 @@ #include <linux/memcontrol.h> #include <linux/watch_queue.h> #include <linux/sysctl.h> +#include <linux/sort.h> #include <linux/uaccess.h> #include <asm/ioctls.h> @@ -76,8 +77,6 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 */ -#define cmp_int(l, r) ((l > r) - (l < r)) - #ifdef CONFIG_PROVE_LOCKING static int pipe_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) diff --git a/fs/proc/base.c b/fs/proc/base.c index fe33a5843fbd..c667702dc69b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -827,7 +827,13 @@ static const struct file_operations proc_single_file_operations = { .release = single_release, }; - +/* + * proc_mem_open() can return errno, NULL or mm_struct*. + * + * - Returns NULL if the task has no mm (PF_KTHREAD or PF_EXITING) + * - Returns mm_struct* on success + * - Returns error code on failure + */ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) { struct task_struct *task = get_proc_task(inode); @@ -854,8 +860,8 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) { struct mm_struct *mm = proc_mem_open(inode, mode); - if (IS_ERR(mm)) - return PTR_ERR(mm); + if (IS_ERR_OR_NULL(mm)) + return mm ? PTR_ERR(mm) : -ESRCH; file->private_data = mm; return 0; diff --git a/fs/proc/page.c b/fs/proc/page.c index 23fc771100ae..999af26c7298 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -22,6 +22,12 @@ #define KPMMASK (KPMSIZE - 1) #define KPMBITS (KPMSIZE * BITS_PER_BYTE) +enum kpage_operation { + KPAGE_FLAGS, + KPAGE_COUNT, + KPAGE_CGROUP, +}; + static inline unsigned long get_max_dump_pfn(void) { #ifdef CONFIG_SPARSEMEM @@ -37,19 +43,17 @@ static inline unsigned long get_max_dump_pfn(void) #endif } -/* /proc/kpagecount - an array exposing page mapcounts - * - * Each entry is a u64 representing the corresponding - * physical page mapcount. - */ -static ssize_t kpagecount_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t kpage_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos, + enum kpage_operation op) { const unsigned long max_dump_pfn = get_max_dump_pfn(); u64 __user *out = (u64 __user *)buf; + struct page *page; unsigned long src = *ppos; unsigned long pfn; ssize_t ret = 0; + u64 info; pfn = src / KPMSIZE; if (src & KPMMASK || count & KPMMASK) @@ -59,24 +63,34 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); while (count > 0) { - struct page *page; - u64 mapcount = 0; - /* * TODO: ZONE_DEVICE support requires to identify * memmaps that were actually initialized. */ page = pfn_to_online_page(pfn); - if (page) { - struct folio *folio = page_folio(page); - if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) - mapcount = folio_precise_page_mapcount(folio, page); - else - mapcount = folio_average_page_mapcount(folio); - } - - if (put_user(mapcount, out)) { + if (page) { + switch (op) { + case KPAGE_FLAGS: + info = stable_page_flags(page); + break; + case KPAGE_COUNT: + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + info = folio_precise_page_mapcount(page_folio(page), page); + else + info = folio_average_page_mapcount(page_folio(page)); + break; + case KPAGE_CGROUP: + info = page_cgroup_ino(page); + break; + default: + info = 0; + break; + } + } else + info = 0; + + if (put_user(info, out)) { ret = -EFAULT; break; } @@ -94,17 +108,23 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, return ret; } +/* /proc/kpagecount - an array exposing page mapcounts + * + * Each entry is a u64 representing the corresponding + * physical page mapcount. + */ +static ssize_t kpagecount_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return kpage_read(file, buf, count, ppos, KPAGE_COUNT); +} + static const struct proc_ops kpagecount_proc_ops = { .proc_flags = PROC_ENTRY_PERMANENT, .proc_lseek = mem_lseek, .proc_read = kpagecount_read, }; -/* /proc/kpageflags - an array exposing page flags - * - * Each entry is a u64 representing the corresponding - * physical page flags. - */ static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit) { @@ -225,47 +245,17 @@ u64 stable_page_flags(const struct page *page) #endif return u; -}; +} +/* /proc/kpageflags - an array exposing page flags + * + * Each entry is a u64 representing the corresponding + * physical page flags. + */ static ssize_t kpageflags_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { - const unsigned long max_dump_pfn = get_max_dump_pfn(); - u64 __user *out = (u64 __user *)buf; - unsigned long src = *ppos; - unsigned long pfn; - ssize_t ret = 0; - - pfn = src / KPMSIZE; - if (src & KPMMASK || count & KPMMASK) - return -EINVAL; - if (src >= max_dump_pfn * KPMSIZE) - return 0; - count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); - - while (count > 0) { - /* - * TODO: ZONE_DEVICE support requires to identify - * memmaps that were actually initialized. - */ - struct page *page = pfn_to_online_page(pfn); - - if (put_user(stable_page_flags(page), out)) { - ret = -EFAULT; - break; - } - - pfn++; - out++; - count -= KPMSIZE; - - cond_resched(); - } - - *ppos += (char __user *)out - buf; - if (!ret) - ret = (char __user *)out - buf; - return ret; + return kpage_read(file, buf, count, ppos, KPAGE_FLAGS); } static const struct proc_ops kpageflags_proc_ops = { @@ -276,53 +266,10 @@ static const struct proc_ops kpageflags_proc_ops = { #ifdef CONFIG_MEMCG static ssize_t kpagecgroup_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { - const unsigned long max_dump_pfn = get_max_dump_pfn(); - u64 __user *out = (u64 __user *)buf; - struct page *ppage; - unsigned long src = *ppos; - unsigned long pfn; - ssize_t ret = 0; - u64 ino; - - pfn = src / KPMSIZE; - if (src & KPMMASK || count & KPMMASK) - return -EINVAL; - if (src >= max_dump_pfn * KPMSIZE) - return 0; - count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src); - - while (count > 0) { - /* - * TODO: ZONE_DEVICE support requires to identify - * memmaps that were actually initialized. - */ - ppage = pfn_to_online_page(pfn); - - if (ppage) - ino = page_cgroup_ino(ppage); - else - ino = 0; - - if (put_user(ino, out)) { - ret = -EFAULT; - break; - } - - pfn++; - out++; - count -= KPMSIZE; - - cond_resched(); - } - - *ppos += (char __user *)out - buf; - if (!ret) - ret = (char __user *)out - buf; - return ret; + return kpage_read(file, buf, count, ppos, KPAGE_CGROUP); } - static const struct proc_ops kpagecgroup_proc_ops = { .proc_flags = PROC_ENTRY_PERMANENT, .proc_lseek = mem_lseek, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 994cde10e3f4..27972c0749e7 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -212,8 +212,8 @@ static int proc_maps_open(struct inode *inode, struct file *file, priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR(priv->mm)) { - int err = PTR_ERR(priv->mm); + if (IS_ERR_OR_NULL(priv->mm)) { + int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; seq_release_private(inode, file); return err; @@ -1325,8 +1325,8 @@ static int smaps_rollup_open(struct inode *inode, struct file *file) priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR(priv->mm)) { - ret = PTR_ERR(priv->mm); + if (IS_ERR_OR_NULL(priv->mm)) { + ret = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; single_release(inode, file); goto out_free; @@ -2069,8 +2069,8 @@ static int pagemap_open(struct inode *inode, struct file *file) struct mm_struct *mm; mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR(mm)) - return PTR_ERR(mm); + if (IS_ERR_OR_NULL(mm)) + return mm ? PTR_ERR(mm) : -ESRCH; file->private_data = mm; return 0; } @@ -2087,7 +2087,8 @@ static int pagemap_release(struct inode *inode, struct file *file) #define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \ PAGE_IS_FILE | PAGE_IS_PRESENT | \ PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \ - PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY) + PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY | \ + PAGE_IS_GUARD) #define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC) struct pagemap_scan_private { @@ -2128,12 +2129,14 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; - if (p->masks_of_interest & PAGE_IS_FILE) { - swp = pte_to_swp_entry(pte); - if (is_pfn_swap_entry(swp) && - !folio_test_anon(pfn_swap_entry_folio(swp))) - categories |= PAGE_IS_FILE; - } + swp = pte_to_swp_entry(pte); + if (is_guard_swp_entry(swp)) + categories |= PAGE_IS_GUARD; + else if ((p->masks_of_interest & PAGE_IS_FILE) && + is_pfn_swap_entry(swp) && + !folio_test_anon(pfn_swap_entry_folio(swp))) + categories |= PAGE_IS_FILE; + if (pte_swp_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index bce674533000..59bfd61d653a 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -260,8 +260,8 @@ static int maps_open(struct inode *inode, struct file *file, priv->inode = inode; priv->mm = proc_mem_open(inode, PTRACE_MODE_READ); - if (IS_ERR(priv->mm)) { - int err = PTR_ERR(priv->mm); + if (IS_ERR_OR_NULL(priv->mm)) { + int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH; seq_release_private(inode, file); return err; diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c index e3f9213131c4..778daf11f1db 100644 --- a/fs/smb/client/namespace.c +++ b/fs/smb/client/namespace.c @@ -283,7 +283,6 @@ struct vfsmount *cifs_d_automount(struct path *path) return newmnt; } - mntget(newmnt); /* prevent immediate expiration */ mnt_set_expiry(newmnt, &cifs_automount_list); schedule_delayed_work(&cifs_automount_task, cifs_mountpoint_expiry_timeout); diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index b1091e70434a..a9602aae21ef 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -149,6 +149,27 @@ config SQUASHFS_XATTR If unsure, say N. +config SQUASHFS_COMP_CACHE_FULL + bool "Enable full caching of compressed blocks" + depends on SQUASHFS + default n + help + This option enables caching of all compressed blocks, Without caching, + repeated reads of the same files trigger excessive disk I/O, significantly + reducinng performance in workloads like fio-based benchmarks. + + For example, fio tests (iodepth=1, numjobs=1, ioengine=psync) show: + With caching: IOPS=2223, BW=278MiB/s (291MB/s) + Without caching: IOPS=815, BW=102MiB/s (107MB/s) + + Enabling this option restores performance to pre-regression levels by + caching all compressed blocks in the page cache, reducing disk I/O for + repeated reads. However, this increases memory usage, which may be a + concern in memory-constrained environments. + + Enable this option if your workload involves frequent repeated reads and + memory usage is not a limiting factor. If unsure, say N. + config SQUASHFS_ZLIB bool "Include support for ZLIB compressed file systems" depends on SQUASHFS diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 2dc730800f44..3061043e915c 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -88,6 +88,10 @@ static int squashfs_bio_read_cached(struct bio *fullbio, struct bio_vec *bv; int idx = 0; int err = 0; +#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL + struct page **cache_pages = kmalloc_array(page_count, + sizeof(void *), GFP_KERNEL | __GFP_ZERO); +#endif bio_for_each_segment_all(bv, fullbio, iter_all) { struct page *page = bv->bv_page; @@ -110,6 +114,11 @@ static int squashfs_bio_read_cached(struct bio *fullbio, head_to_cache = page; else if (idx == page_count - 1 && index + length != read_end) tail_to_cache = page; +#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL + /* Cache all pages in the BIO for repeated reads */ + else if (cache_pages) + cache_pages[idx] = page; +#endif if (!bio || idx != end_idx) { struct bio *new = bio_alloc_clone(bdev, fullbio, @@ -163,6 +172,25 @@ static int squashfs_bio_read_cached(struct bio *fullbio, } } +#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL + if (!cache_pages) + goto out; + + for (idx = 0; idx < page_count; idx++) { + if (!cache_pages[idx]) + continue; + int ret = add_to_page_cache_lru(cache_pages[idx], cache_mapping, + (read_start >> PAGE_SHIFT) + idx, + GFP_NOIO); + + if (!ret) { + SetPageUptodate(cache_pages[idx]); + unlock_page(cache_pages[idx]); + } + } + kfree(cache_pages); +out: +#endif return 0; } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 67c55fe32ce8..992ea0e37257 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -202,6 +202,11 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) msblk->panic_on_errors = (opts->errors == Opt_errors_panic); msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); + if (!msblk->devblksize) { + errorf(fc, "squashfs: unable to set blocksize\n"); + return -EINVAL; + } + msblk->devblksize_log2 = ffz(~msblk->devblksize); mutex_init(&msblk->meta_index_mutex); diff --git a/fs/super.c b/fs/super.c index bcc4e87123c8..21799e213fd7 100644 --- a/fs/super.c +++ b/fs/super.c @@ -824,13 +824,6 @@ struct super_block *sget(struct file_system_type *type, struct super_block *old; int err; - /* We don't yet pass the user namespace of the parent - * mount through to here so always use &init_user_ns - * until that changes. - */ - if (flags & SB_SUBMOUNT) - user_ns = &init_user_ns; - retry: spin_lock(&sb_lock); if (test) { @@ -850,7 +843,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns); + s = alloc_super(type, flags, user_ns); if (!s) return ERR_PTR(-ENOMEM); goto retry; diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index d613a4094db6..9c00fc5baa30 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -290,8 +290,6 @@ xfs_zone_gc_query_cb( return 0; } -#define cmp_int(l, r) ((l > r) - (l < r)) - static int xfs_zone_gc_rmap_rec_cmp( const void *a, |