summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c5
-rw-r--r--fs/afs/mntpt.c1
-rw-r--r--fs/afs/write.c9
-rw-r--r--fs/bcachefs/util.h3
-rw-r--r--fs/cachefiles/io.c16
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/configfs/Kconfig1
-rw-r--r--fs/dax.c5
-rw-r--r--fs/erofs/fscache.c6
-rw-r--r--fs/exec.c69
-rw-r--r--fs/fuse/dev.c182
-rw-r--r--fs/fuse/dev_uring.c34
-rw-r--r--fs/fuse/dir.c49
-rw-r--r--fs/fuse/file.c474
-rw-r--r--fs/fuse/fuse_dev_i.h9
-rw-r--r--fs/fuse/fuse_i.h10
-rw-r--r--fs/fuse/inode.c11
-rw-r--r--fs/fuse/readdir.c36
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/sys.c1
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/jfs/jfs_metapage.c106
-rw-r--r--fs/mount.h5
-rw-r--r--fs/namespace.c39
-rw-r--r--fs/netfs/buffered_read.c56
-rw-r--r--fs/netfs/buffered_write.c5
-rw-r--r--fs/netfs/direct_read.c16
-rw-r--r--fs/netfs/direct_write.c12
-rw-r--r--fs/netfs/fscache_io.c10
-rw-r--r--fs/netfs/internal.h42
-rw-r--r--fs/netfs/main.c1
-rw-r--r--fs/netfs/misc.c219
-rw-r--r--fs/netfs/objects.c48
-rw-r--r--fs/netfs/read_collect.c199
-rw-r--r--fs/netfs/read_pgpriv2.c4
-rw-r--r--fs/netfs/read_retry.c26
-rw-r--r--fs/netfs/read_single.c6
-rw-r--r--fs/netfs/write_collect.c83
-rw-r--r--fs/netfs/write_issue.c38
-rw-r--r--fs/netfs/write_retry.c19
-rw-r--r--fs/nfs/client.c6
-rw-r--r--fs/nfs/delegation.c25
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfs/fscache.c1
-rw-r--r--fs/nfs/inode.c51
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/localio.c51
-rw-r--r--fs/nfs/namespace.c1
-rw-r--r--fs/nfs/nfs42.h1
-rw-r--r--fs/nfs/nfs42proc.c29
-rw-r--r--fs/nfs/nfs42xdr.c64
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4file.c10
-rw-r--r--fs/nfs/nfs4proc.c75
-rw-r--r--fs/nfs/nfs4xdr.c1
-rw-r--r--fs/nfs/pnfs_nfs.c11
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfs/super.c19
-rw-r--r--fs/nfs/sysfs.c28
-rw-r--r--fs/nfs/write.c54
-rw-r--r--fs/nfs_common/nfslocalio.c99
-rw-r--r--fs/nfsd/filecache.c32
-rw-r--r--fs/nfsd/filecache.h3
-rw-r--r--fs/nfsd/localio.c70
-rw-r--r--fs/nilfs2/btree.c4
-rw-r--r--fs/nilfs2/direct.c3
-rw-r--r--fs/nilfs2/mdt.c2
-rw-r--r--fs/nilfs2/segment.c16
-rw-r--r--fs/nilfs2/segment.h1
-rw-r--r--fs/ntfs3/file.c31
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/filecheck.c2
-rw-r--r--fs/ocfs2/quota_local.c2
-rw-r--r--fs/ocfs2/stackglue.c3
-rw-r--r--fs/pipe.c3
-rw-r--r--fs/pnode.c4
-rw-r--r--fs/proc/base.c12
-rw-r--r--fs/proc/page.c161
-rw-r--r--fs/proc/task_mmu.c29
-rw-r--r--fs/proc/task_nommu.c4
-rw-r--r--fs/smb/client/cached_dir.c24
-rw-r--r--fs/smb/client/cifsfs.c1
-rw-r--r--fs/smb/client/cifsglob.h18
-rw-r--r--fs/smb/client/cifspdu.h6
-rw-r--r--fs/smb/client/cifsproto.h3
-rw-r--r--fs/smb/client/cifssmb.c25
-rw-r--r--fs/smb/client/connect.c10
-rw-r--r--fs/smb/client/dir.c23
-rw-r--r--fs/smb/client/file.c10
-rw-r--r--fs/smb/client/misc.c8
-rw-r--r--fs/smb/client/namespace.c4
-rw-r--r--fs/smb/client/sess.c25
-rw-r--r--fs/smb/client/smb2ops.c7
-rw-r--r--fs/smb/client/smb2pdu.c82
-rw-r--r--fs/smb/client/smb2proto.h3
-rw-r--r--fs/squashfs/Kconfig21
-rw-r--r--fs/squashfs/block.c28
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/super.c9
-rw-r--r--fs/ufs/super.c307
-rw-r--r--fs/ufs/ufs.h9
-rw-r--r--fs/xfs/xfs_aops.c22
-rw-r--r--fs/xfs/xfs_zone_gc.c2
104 files changed, 1801 insertions, 1639 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 1286d96a29bc..862164181bac 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -59,7 +59,7 @@ static void v9fs_issue_write(struct netfs_io_subrequest *subreq)
len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err);
if (len > 0)
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
- netfs_write_subrequest_terminated(subreq, len ?: err, false);
+ netfs_write_subrequest_terminated(subreq, len ?: err);
}
/**
@@ -77,7 +77,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
/* if we just extended the file size, any portion not in
* cache won't be on server and is zeroes */
- if (subreq->rreq->origin != NETFS_DIO_READ)
+ if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+ subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (pos + total >= i_size_read(rreq->inode))
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 45cee6534122..9434a5399f2b 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -189,7 +189,6 @@ struct vfsmount *afs_d_automount(struct path *path)
if (IS_ERR(newmnt))
return newmnt;
- mntget(newmnt); /* prevent immediate expiration */
mnt_set_expiry(newmnt, &afs_vfsmounts);
queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
afs_mntpt_expiry_timeout * HZ);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 18b0a9f1615e..2e7526ea883a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -120,17 +120,17 @@ static void afs_issue_write_worker(struct work_struct *work)
#if 0 // Error injection
if (subreq->debug_index == 3)
- return netfs_write_subrequest_terminated(subreq, -ENOANO, false);
+ return netfs_write_subrequest_terminated(subreq, -ENOANO);
if (!subreq->retry_count) {
set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN);
}
#endif
op = afs_alloc_operation(wreq->netfs_priv, vnode->volume);
if (IS_ERR(op))
- return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN);
afs_op_set_vnode(op, 0, vnode);
op->file[0].dv_delta = 1;
@@ -166,7 +166,7 @@ static void afs_issue_write_worker(struct work_struct *work)
break;
}
- netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false);
+ netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len);
}
void afs_issue_write(struct netfs_io_subrequest *subreq)
@@ -202,6 +202,7 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st
case NETFS_READ_GAPS:
case NETFS_READ_SINGLE:
case NETFS_READ_FOR_WRITE:
+ case NETFS_UNBUFFERED_READ:
case NETFS_DIO_READ:
return;
default:
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 25cf61ebd40c..0a4b1d433621 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -17,6 +17,7 @@
#include <linux/random.h>
#include <linux/ratelimit.h>
#include <linux/slab.h>
+#include <linux/sort.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
@@ -672,8 +673,6 @@ static inline void percpu_memset(void __percpu *p, int c, size_t bytes)
u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
-#define cmp_int(l, r) ((l > r) - (l < r))
-
static inline int u8_cmp(u8 l, u8 r)
{
return cmp_int(l, r);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 92058ae43488..c08e4a66ac07 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -63,7 +63,7 @@ static void cachefiles_read_complete(struct kiocb *iocb, long ret)
ret = -ESTALE;
}
- ki->term_func(ki->term_func_priv, ret, ki->was_async);
+ ki->term_func(ki->term_func_priv, ret);
}
cachefiles_put_kiocb(ki);
@@ -188,7 +188,7 @@ in_progress:
presubmission_error:
if (term_func)
- term_func(term_func_priv, ret < 0 ? ret : skipped, false);
+ term_func(term_func_priv, ret < 0 ? ret : skipped);
return ret;
}
@@ -271,7 +271,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
if (ki->term_func)
- ki->term_func(ki->term_func_priv, ret, ki->was_async);
+ ki->term_func(ki->term_func_priv, ret);
cachefiles_put_kiocb(ki);
}
@@ -301,7 +301,7 @@ int __cachefiles_write(struct cachefiles_object *object,
ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
if (!ki) {
if (term_func)
- term_func(term_func_priv, -ENOMEM, false);
+ term_func(term_func_priv, -ENOMEM);
return -ENOMEM;
}
@@ -366,7 +366,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
{
if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
if (term_func)
- term_func(term_func_priv, -ENOBUFS, false);
+ term_func(term_func_priv, -ENOBUFS);
trace_netfs_sreq(term_func_priv, netfs_sreq_trace_cache_nowrite);
return -ENOBUFS;
}
@@ -665,7 +665,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
pre = CACHEFILES_DIO_BLOCK_SIZE - off;
if (pre >= len) {
fscache_count_dio_misfit();
- netfs_write_subrequest_terminated(subreq, len, false);
+ netfs_write_subrequest_terminated(subreq, len);
return;
}
subreq->transferred += pre;
@@ -691,7 +691,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
len -= post;
if (len == 0) {
fscache_count_dio_misfit();
- netfs_write_subrequest_terminated(subreq, post, false);
+ netfs_write_subrequest_terminated(subreq, post);
return;
}
iov_iter_truncate(&subreq->io_iter, len);
@@ -703,7 +703,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
&start, &len, len, true);
cachefiles_end_secure(cache, saved_cred);
if (ret < 0) {
- netfs_write_subrequest_terminated(subreq, ret, false);
+ netfs_write_subrequest_terminated(subreq, ret);
return;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 29be367905a1..b95c4cb21c13 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -238,6 +238,7 @@ static void finish_netfs_read(struct ceph_osd_request *req)
if (sparse && err > 0)
err = ceph_sparse_ext_map_end(op);
if (err < subreq->len &&
+ subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (IS_ENCRYPTED(inode) && err > 0) {
@@ -281,7 +282,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
size_t len;
int mode;
- if (rreq->origin != NETFS_DIO_READ)
+ if (rreq->origin != NETFS_UNBUFFERED_READ &&
+ rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
@@ -539,7 +541,7 @@ static void ceph_set_page_fscache(struct page *page)
folio_start_private_2(page_folio(page)); /* [DEPRECATED] */
}
-static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async)
+static void ceph_fscache_write_terminated(void *priv, ssize_t error)
{
struct inode *inode = priv;
diff --git a/fs/configfs/Kconfig b/fs/configfs/Kconfig
index 272b64456999..1fcd761fe7be 100644
--- a/fs/configfs/Kconfig
+++ b/fs/configfs/Kconfig
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config CONFIGFS_FS
tristate "Userspace-driven configuration filesystem"
- select SYSFS
help
configfs is a RAM-based filesystem that provides the converse
of sysfs's functionality. Where sysfs is a filesystem-based
diff --git a/fs/dax.c b/fs/dax.c
index 676303419e9e..ea0c35794bf9 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -257,7 +257,7 @@ static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry)
wq = dax_entry_waitqueue(xas, entry, &ewait.key);
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
- xas_pause(xas);
+ xas_reset(xas);
xas_unlock_irq(xas);
schedule();
finish_wait(wq, &ewait.wait);
@@ -1422,8 +1422,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
mm_inc_nr_ptes(vma->vm_mm);
}
- pmd_entry = mk_pmd(&zero_folio->page, vmf->vma->vm_page_prot);
- pmd_entry = pmd_mkhuge(pmd_entry);
+ pmd_entry = folio_mk_pmd(zero_folio, vmf->vma->vm_page_prot);
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
spin_unlock(ptl);
trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 9c9129bca346..34517ca9df91 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -102,8 +102,7 @@ static void erofs_fscache_req_io_put(struct erofs_fscache_io *io)
erofs_fscache_req_put(req);
}
-static void erofs_fscache_req_end_io(void *priv,
- ssize_t transferred_or_error, bool was_async)
+static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error)
{
struct erofs_fscache_io *io = priv;
struct erofs_fscache_rq *req = io->private;
@@ -180,8 +179,7 @@ struct erofs_fscache_bio {
struct bio_vec bvecs[BIO_MAX_VECS];
};
-static void erofs_fscache_bio_endio(void *priv,
- ssize_t transferred_or_error, bool was_async)
+static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error)
{
struct erofs_fscache_bio *io = priv;
diff --git a/fs/exec.c b/fs/exec.c
index cfbb2b9ee3c9..1f5fdd2e096e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -78,6 +78,9 @@
#include <trace/events/sched.h>
+/* For vma exec functions. */
+#include "../mm/internal.h"
+
static int bprm_creds_from_file(struct linux_binprm *bprm);
int suid_dumpable = 0;
@@ -182,60 +185,6 @@ static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
flush_cache_page(bprm->vma, pos, page_to_pfn(page));
}
-static int __bprm_mm_init(struct linux_binprm *bprm)
-{
- int err;
- struct vm_area_struct *vma = NULL;
- struct mm_struct *mm = bprm->mm;
-
- bprm->vma = vma = vm_area_alloc(mm);
- if (!vma)
- return -ENOMEM;
- vma_set_anonymous(vma);
-
- if (mmap_write_lock_killable(mm)) {
- err = -EINTR;
- goto err_free;
- }
-
- /*
- * Need to be called with mmap write lock
- * held, to avoid race with ksmd.
- */
- err = ksm_execve(mm);
- if (err)
- goto err_ksm;
-
- /*
- * Place the stack at the largest stack address the architecture
- * supports. Later, we'll move this to an appropriate place. We don't
- * use STACK_TOP because that can depend on attributes which aren't
- * configured yet.
- */
- BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
- vma->vm_end = STACK_TOP_MAX;
- vma->vm_start = vma->vm_end - PAGE_SIZE;
- vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
- vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
-
- err = insert_vm_struct(mm, vma);
- if (err)
- goto err;
-
- mm->stack_vm = mm->total_vm = 1;
- mmap_write_unlock(mm);
- bprm->p = vma->vm_end - sizeof(void *);
- return 0;
-err:
- ksm_exit(mm);
-err_ksm:
- mmap_write_unlock(mm);
-err_free:
- bprm->vma = NULL;
- vm_area_free(vma);
- return err;
-}
-
static bool valid_arg_len(struct linux_binprm *bprm, long len)
{
return len <= MAX_ARG_STRLEN;
@@ -288,12 +237,6 @@ static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
{
}
-static int __bprm_mm_init(struct linux_binprm *bprm)
-{
- bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
- return 0;
-}
-
static bool valid_arg_len(struct linux_binprm *bprm, long len)
{
return len <= bprm->p;
@@ -322,9 +265,13 @@ static int bprm_mm_init(struct linux_binprm *bprm)
bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
task_unlock(current->group_leader);
- err = __bprm_mm_init(bprm);
+#ifndef CONFIG_MMU
+ bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
+#else
+ err = create_init_stack_vma(bprm->mm, &bprm->vma, &bprm->p);
if (err)
goto err;
+#endif
return 0;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6dcbaa218b7a..e80cd8f2c049 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -23,6 +23,7 @@
#include <linux/swap.h>
#include <linux/splice.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#define CREATE_TRACE_POINTS
#include "fuse_trace.h"
@@ -45,7 +46,7 @@ bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list)
return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout);
}
-bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing)
+static bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing)
{
int i;
@@ -816,7 +817,7 @@ static int unlock_request(struct fuse_req *req)
return err;
}
-void fuse_copy_init(struct fuse_copy_state *cs, int write,
+void fuse_copy_init(struct fuse_copy_state *cs, bool write,
struct iov_iter *iter)
{
memset(cs, 0, sizeof(*cs));
@@ -955,10 +956,10 @@ static int fuse_check_folio(struct folio *folio)
* folio that was originally in @pagep will lose a reference and the new
* folio returned in @pagep will carry a reference.
*/
-static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
+static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop)
{
int err;
- struct folio *oldfolio = page_folio(*pagep);
+ struct folio *oldfolio = *foliop;
struct folio *newfolio;
struct pipe_buffer *buf = cs->pipebufs;
@@ -979,7 +980,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
cs->pipebufs++;
cs->nr_segs--;
- if (cs->len != PAGE_SIZE)
+ if (cs->len != folio_size(oldfolio))
goto out_fallback;
if (!pipe_buf_try_steal(cs->pipe, buf))
@@ -1025,7 +1026,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (test_bit(FR_ABORTED, &cs->req->flags))
err = -ENOENT;
else
- *pagep = &newfolio->page;
+ *foliop = newfolio;
spin_unlock(&cs->req->waitq.lock);
if (err) {
@@ -1058,8 +1059,8 @@ out_fallback:
goto out_put_old;
}
-static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
- unsigned offset, unsigned count)
+static int fuse_ref_folio(struct fuse_copy_state *cs, struct folio *folio,
+ unsigned offset, unsigned count)
{
struct pipe_buffer *buf;
int err;
@@ -1067,17 +1068,17 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;
- get_page(page);
+ folio_get(folio);
err = unlock_request(cs->req);
if (err) {
- put_page(page);
+ folio_put(folio);
return err;
}
fuse_copy_finish(cs);
buf = cs->pipebufs;
- buf->page = page;
+ buf->page = &folio->page;
buf->offset = offset;
buf->len = count;
@@ -1089,20 +1090,24 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
}
/*
- * Copy a page in the request to/from the userspace buffer. Must be
+ * Copy a folio in the request to/from the userspace buffer. Must be
* done atomically
*/
-static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
- unsigned offset, unsigned count, int zeroing)
+static int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop,
+ unsigned offset, unsigned count, int zeroing)
{
int err;
- struct page *page = *pagep;
+ struct folio *folio = *foliop;
+ size_t size;
- if (page && zeroing && count < PAGE_SIZE)
- clear_highpage(page);
+ if (folio) {
+ size = folio_size(folio);
+ if (zeroing && count < size)
+ folio_zero_range(folio, 0, size);
+ }
while (count) {
- if (cs->write && cs->pipebufs && page) {
+ if (cs->write && cs->pipebufs && folio) {
/*
* Can't control lifetime of pipe buffers, so always
* copy user pages.
@@ -1112,12 +1117,12 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
if (err)
return err;
} else {
- return fuse_ref_page(cs, page, offset, count);
+ return fuse_ref_folio(cs, folio, offset, count);
}
} else if (!cs->len) {
- if (cs->move_pages && page &&
- offset == 0 && count == PAGE_SIZE) {
- err = fuse_try_move_page(cs, pagep);
+ if (cs->move_folios && folio &&
+ offset == 0 && count == size) {
+ err = fuse_try_move_folio(cs, foliop);
if (err <= 0)
return err;
} else {
@@ -1126,22 +1131,30 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
return err;
}
}
- if (page) {
- void *mapaddr = kmap_local_page(page);
- void *buf = mapaddr + offset;
- offset += fuse_copy_do(cs, &buf, &count);
+ if (folio) {
+ void *mapaddr = kmap_local_folio(folio, offset);
+ void *buf = mapaddr;
+ unsigned int copy = count;
+ unsigned int bytes_copied;
+
+ if (folio_test_highmem(folio) && count > PAGE_SIZE - offset_in_page(offset))
+ copy = PAGE_SIZE - offset_in_page(offset);
+
+ bytes_copied = fuse_copy_do(cs, &buf, &copy);
kunmap_local(mapaddr);
+ offset += bytes_copied;
+ count -= bytes_copied;
} else
offset += fuse_copy_do(cs, NULL, &count);
}
- if (page && !cs->write)
- flush_dcache_page(page);
+ if (folio && !cs->write)
+ flush_dcache_folio(folio);
return 0;
}
-/* Copy pages in the request to/from userspace buffer */
-static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
- int zeroing)
+/* Copy folios in the request to/from userspace buffer */
+static int fuse_copy_folios(struct fuse_copy_state *cs, unsigned nbytes,
+ int zeroing)
{
unsigned i;
struct fuse_req *req = cs->req;
@@ -1151,23 +1164,12 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
int err;
unsigned int offset = ap->descs[i].offset;
unsigned int count = min(nbytes, ap->descs[i].length);
- struct page *orig, *pagep;
- orig = pagep = &ap->folios[i]->page;
-
- err = fuse_copy_page(cs, &pagep, offset, count, zeroing);
+ err = fuse_copy_folio(cs, &ap->folios[i], offset, count, zeroing);
if (err)
return err;
nbytes -= count;
-
- /*
- * fuse_copy_page may have moved a page from a pipe instead of
- * copying into our given page, so update the folios if it was
- * replaced.
- */
- if (pagep != orig)
- ap->folios[i] = page_folio(pagep);
}
return 0;
}
@@ -1197,7 +1199,7 @@ int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
for (i = 0; !err && i < numargs; i++) {
struct fuse_arg *arg = &args[i];
if (i == numargs - 1 && argpages)
- err = fuse_copy_pages(cs, arg->size, zeroing);
+ err = fuse_copy_folios(cs, arg->size, zeroing);
else
err = fuse_copy_one(cs, arg->value, arg->size);
}
@@ -1538,7 +1540,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
if (!user_backed_iter(to))
return -EINVAL;
- fuse_copy_init(&cs, 1, to);
+ fuse_copy_init(&cs, true, to);
return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
}
@@ -1561,7 +1563,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (!bufs)
return -ENOMEM;
- fuse_copy_init(&cs, 1, NULL);
+ fuse_copy_init(&cs, true, NULL);
cs.pipebufs = bufs;
cs.pipe = pipe;
ret = fuse_dev_do_read(fud, in, &cs, len);
@@ -1786,20 +1788,23 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
num = outarg.size;
while (num) {
struct folio *folio;
- struct page *page;
- unsigned int this_num;
+ unsigned int folio_offset;
+ unsigned int nr_bytes;
+ unsigned int nr_pages;
folio = filemap_grab_folio(mapping, index);
err = PTR_ERR(folio);
if (IS_ERR(folio))
goto out_iput;
- page = &folio->page;
- this_num = min_t(unsigned, num, folio_size(folio) - offset);
- err = fuse_copy_page(cs, &page, offset, this_num, 0);
+ folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset;
+ nr_bytes = min_t(unsigned, num, folio_size(folio) - folio_offset);
+ nr_pages = (offset + nr_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0);
if (!folio_test_uptodate(folio) && !err && offset == 0 &&
- (this_num == folio_size(folio) || file_size == end)) {
- folio_zero_segment(folio, this_num, folio_size(folio));
+ (nr_bytes == folio_size(folio) || file_size == end)) {
+ folio_zero_segment(folio, nr_bytes, folio_size(folio));
folio_mark_uptodate(folio);
}
folio_unlock(folio);
@@ -1808,9 +1813,9 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
if (err)
goto out_iput;
- num -= this_num;
+ num -= nr_bytes;
offset = 0;
- index++;
+ index += nr_pages;
}
err = 0;
@@ -1849,7 +1854,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
unsigned int num;
unsigned int offset;
size_t total_len = 0;
- unsigned int num_pages, cur_pages = 0;
+ unsigned int num_pages;
struct fuse_conn *fc = fm->fc;
struct fuse_retrieve_args *ra;
size_t args_size = sizeof(*ra);
@@ -1867,6 +1872,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
num_pages = min(num_pages, fc->max_pages);
+ num = min(num, num_pages << PAGE_SHIFT);
args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0]));
@@ -1887,25 +1893,29 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
index = outarg->offset >> PAGE_SHIFT;
- while (num && cur_pages < num_pages) {
+ while (num) {
struct folio *folio;
- unsigned int this_num;
+ unsigned int folio_offset;
+ unsigned int nr_bytes;
+ unsigned int nr_pages;
folio = filemap_get_folio(mapping, index);
if (IS_ERR(folio))
break;
- this_num = min_t(unsigned, num, PAGE_SIZE - offset);
+ folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset;
+ nr_bytes = min(folio_size(folio) - folio_offset, num);
+ nr_pages = (offset + nr_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
ap->folios[ap->num_folios] = folio;
- ap->descs[ap->num_folios].offset = offset;
- ap->descs[ap->num_folios].length = this_num;
+ ap->descs[ap->num_folios].offset = folio_offset;
+ ap->descs[ap->num_folios].length = nr_bytes;
ap->num_folios++;
- cur_pages++;
offset = 0;
- num -= this_num;
- total_len += this_num;
- index++;
+ num -= nr_bytes;
+ total_len += nr_bytes;
+ index += nr_pages;
}
ra->inarg.offset = outarg->offset;
ra->inarg.size = total_len;
@@ -2021,11 +2031,24 @@ static int fuse_notify_resend(struct fuse_conn *fc)
return 0;
}
+/*
+ * Increments the fuse connection epoch. This will result of dentries from
+ * previous epochs to be invalidated.
+ *
+ * XXX optimization: add call to shrink_dcache_sb()?
+ */
+static int fuse_notify_inc_epoch(struct fuse_conn *fc)
+{
+ atomic_inc(&fc->epoch);
+
+ return 0;
+}
+
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
- /* Don't try to move pages (yet) */
- cs->move_pages = 0;
+ /* Don't try to move folios (yet) */
+ cs->move_folios = false;
switch (code) {
case FUSE_NOTIFY_POLL:
@@ -2049,6 +2072,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
case FUSE_NOTIFY_RESEND:
return fuse_notify_resend(fc);
+ case FUSE_NOTIFY_INC_EPOCH:
+ return fuse_notify_inc_epoch(fc);
+
default:
fuse_copy_finish(cs);
return -EINVAL;
@@ -2173,7 +2199,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
spin_unlock(&fpq->lock);
cs->req = req;
if (!req->args->page_replace)
- cs->move_pages = 0;
+ cs->move_folios = false;
if (oh.error)
err = nbytes != sizeof(oh) ? -EINVAL : 0;
@@ -2211,7 +2237,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
if (!user_backed_iter(from))
return -EINVAL;
- fuse_copy_init(&cs, 0, from);
+ fuse_copy_init(&cs, false, from);
return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
}
@@ -2285,13 +2311,13 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
}
pipe_unlock(pipe);
- fuse_copy_init(&cs, 0, NULL);
+ fuse_copy_init(&cs, false, NULL);
cs.pipebufs = bufs;
cs.nr_segs = nbuf;
cs.pipe = pipe;
if (flags & SPLICE_F_MOVE)
- cs.move_pages = 1;
+ cs.move_folios = true;
ret = fuse_dev_do_write(fud, &cs, len);
@@ -2602,6 +2628,17 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
}
}
+#ifdef CONFIG_PROC_FS
+static void fuse_dev_show_fdinfo(struct seq_file *seq, struct file *file)
+{
+ struct fuse_dev *fud = fuse_get_dev(file);
+ if (!fud)
+ return;
+
+ seq_printf(seq, "fuse_connection:\t%u\n", fud->fc->dev);
+}
+#endif
+
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.open = fuse_dev_open,
@@ -2617,6 +2654,9 @@ const struct file_operations fuse_dev_operations = {
#ifdef CONFIG_FUSE_IO_URING
.uring_cmd = fuse_uring_cmd,
#endif
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = fuse_dev_show_fdinfo,
+#endif
};
EXPORT_SYMBOL_GPL(fuse_dev_operations);
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index accdce2977c5..249b210becb1 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -140,6 +140,21 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring)
}
}
+static bool ent_list_request_expired(struct fuse_conn *fc, struct list_head *list)
+{
+ struct fuse_ring_ent *ent;
+ struct fuse_req *req;
+
+ ent = list_first_entry_or_null(list, struct fuse_ring_ent, list);
+ if (!ent)
+ return false;
+
+ req = ent->fuse_req;
+
+ return time_is_before_jiffies(req->create_time +
+ fc->timeout.req_timeout);
+}
+
bool fuse_uring_request_expired(struct fuse_conn *fc)
{
struct fuse_ring *ring = fc->ring;
@@ -157,7 +172,8 @@ bool fuse_uring_request_expired(struct fuse_conn *fc)
spin_lock(&queue->lock);
if (fuse_request_expired(fc, &queue->fuse_req_queue) ||
fuse_request_expired(fc, &queue->fuse_req_bg_queue) ||
- fuse_fpq_processing_expired(fc, queue->fpq.processing)) {
+ ent_list_request_expired(fc, &queue->ent_w_req_queue) ||
+ ent_list_request_expired(fc, &queue->ent_in_userspace)) {
spin_unlock(&queue->lock);
return true;
}
@@ -494,7 +510,7 @@ static void fuse_uring_cancel(struct io_uring_cmd *cmd,
spin_lock(&queue->lock);
if (ent->state == FRRS_AVAILABLE) {
ent->state = FRRS_USERSPACE;
- list_move(&ent->list, &queue->ent_in_userspace);
+ list_move_tail(&ent->list, &queue->ent_in_userspace);
need_cmd_done = true;
ent->cmd = NULL;
}
@@ -577,8 +593,8 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
if (err)
return err;
- fuse_copy_init(&cs, 0, &iter);
- cs.is_uring = 1;
+ fuse_copy_init(&cs, false, &iter);
+ cs.is_uring = true;
cs.req = req;
return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
@@ -607,8 +623,8 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
return err;
}
- fuse_copy_init(&cs, 1, &iter);
- cs.is_uring = 1;
+ fuse_copy_init(&cs, true, &iter);
+ cs.is_uring = true;
cs.req = req;
if (num_args > 0) {
@@ -714,7 +730,7 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent,
cmd = ent->cmd;
ent->cmd = NULL;
ent->state = FRRS_USERSPACE;
- list_move(&ent->list, &queue->ent_in_userspace);
+ list_move_tail(&ent->list, &queue->ent_in_userspace);
spin_unlock(&queue->lock);
io_uring_cmd_done(cmd, 0, 0, issue_flags);
@@ -764,7 +780,7 @@ static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent,
clear_bit(FR_PENDING, &req->flags);
ent->fuse_req = req;
ent->state = FRRS_FUSE_REQ;
- list_move(&ent->list, &queue->ent_w_req_queue);
+ list_move_tail(&ent->list, &queue->ent_w_req_queue);
fuse_uring_add_to_pq(ent, req);
}
@@ -1180,7 +1196,7 @@ static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
spin_lock(&queue->lock);
ent->state = FRRS_USERSPACE;
- list_move(&ent->list, &queue->ent_in_userspace);
+ list_move_tail(&ent->list, &queue->ent_in_userspace);
ent->cmd = NULL;
spin_unlock(&queue->lock);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 33b82529cb6e..45b4c3cc1396 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -200,9 +200,14 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
{
struct inode *inode;
struct fuse_mount *fm;
+ struct fuse_conn *fc;
struct fuse_inode *fi;
int ret;
+ fc = get_fuse_conn_super(dir->i_sb);
+ if (entry->d_time < atomic_read(&fc->epoch))
+ goto invalid;
+
inode = d_inode_rcu(entry);
if (inode && fuse_is_bad(inode))
goto invalid;
@@ -319,9 +324,6 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
/* Create the submount */
mnt = fc_mount(fsc);
- if (!IS_ERR(mnt))
- mntget(mnt);
-
put_fs_context(fsc);
return mnt;
}
@@ -415,16 +417,20 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
unsigned int flags)
{
- int err;
struct fuse_entry_out outarg;
+ struct fuse_conn *fc;
struct inode *inode;
struct dentry *newent;
+ int err, epoch;
bool outarg_valid = true;
bool locked;
if (fuse_is_bad(dir))
return ERR_PTR(-EIO);
+ fc = get_fuse_conn_super(dir->i_sb);
+ epoch = atomic_read(&fc->epoch);
+
locked = fuse_lock_inode(dir);
err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
&outarg, &inode);
@@ -446,6 +452,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
goto out_err;
entry = newent ? newent : entry;
+ entry->d_time = epoch;
if (outarg_valid)
fuse_change_entry_timeout(entry, &outarg);
else
@@ -619,7 +626,6 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *entry, struct file *file,
unsigned int flags, umode_t mode, u32 opcode)
{
- int err;
struct inode *inode;
struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
@@ -629,11 +635,13 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
struct fuse_entry_out outentry;
struct fuse_inode *fi;
struct fuse_file *ff;
+ int epoch, err;
bool trunc = flags & O_TRUNC;
/* Userspace expects S_IFREG in create mode */
BUG_ON((mode & S_IFMT) != S_IFREG);
+ epoch = atomic_read(&fm->fc->epoch);
forget = fuse_alloc_forget();
err = -ENOMEM;
if (!forget)
@@ -702,6 +710,7 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
}
kfree(forget);
d_instantiate(entry, inode);
+ entry->d_time = epoch;
fuse_change_entry_timeout(entry, &outentry);
fuse_dir_changed(dir);
err = generic_file_open(inode, file);
@@ -788,12 +797,14 @@ static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_moun
struct fuse_entry_out outarg;
struct inode *inode;
struct dentry *d;
- int err;
struct fuse_forget_link *forget;
+ int epoch, err;
if (fuse_is_bad(dir))
return ERR_PTR(-EIO);
+ epoch = atomic_read(&fm->fc->epoch);
+
forget = fuse_alloc_forget();
if (!forget)
return ERR_PTR(-ENOMEM);
@@ -835,10 +846,13 @@ static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_moun
if (IS_ERR(d))
return d;
- if (d)
+ if (d) {
+ d->d_time = epoch;
fuse_change_entry_timeout(d, &outarg);
- else
+ } else {
+ entry->d_time = epoch;
fuse_change_entry_timeout(entry, &outarg);
+ }
fuse_dir_changed(dir);
return d;
@@ -1612,10 +1626,10 @@ static int fuse_permission(struct mnt_idmap *idmap,
return err;
}
-static int fuse_readlink_page(struct inode *inode, struct folio *folio)
+static int fuse_readlink_folio(struct inode *inode, struct folio *folio)
{
struct fuse_mount *fm = get_fuse_mount(inode);
- struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 };
+ struct fuse_folio_desc desc = { .length = folio_size(folio) - 1 };
struct fuse_args_pages ap = {
.num_folios = 1,
.folios = &folio,
@@ -1670,7 +1684,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
if (!folio)
goto out_err;
- err = fuse_readlink_page(inode, folio);
+ err = fuse_readlink_folio(inode, folio);
if (err) {
folio_put(folio);
goto out_err;
@@ -1946,6 +1960,7 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
int err;
bool trust_local_cmtime = is_wb;
bool fault_blocked = false;
+ u64 attr_version;
if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE;
@@ -2030,6 +2045,8 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
inarg.valid |= FATTR_KILL_SUIDGID;
}
+
+ attr_version = fuse_get_attr_version(fm->fc);
fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
err = fuse_simple_request(fm, &args);
if (err) {
@@ -2055,6 +2072,14 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
/* FIXME: clear I_DIRTY_SYNC? */
}
+ if (fi->attr_version > attr_version) {
+ /*
+ * Apply attributes, for example for fsnotify_change(), but set
+ * attribute timeout to zero.
+ */
+ outarg.attr_valid = outarg.attr_valid_nsec = 0;
+ }
+
fuse_change_attributes_common(inode, &outarg.attr, NULL,
ATTR_TIMEOUT(&outarg),
fuse_get_cache_mask(inode), 0);
@@ -2260,7 +2285,7 @@ void fuse_init_dir(struct inode *inode)
static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
{
- int err = fuse_readlink_page(folio->mapping->host, folio);
+ int err = fuse_readlink_folio(folio->mapping->host, folio);
if (!err)
folio_mark_uptodate(folio);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 754378dd9f71..f102afc03359 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -415,89 +415,11 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
struct fuse_writepage_args {
struct fuse_io_args ia;
- struct rb_node writepages_entry;
struct list_head queue_entry;
- struct fuse_writepage_args *next;
struct inode *inode;
struct fuse_sync_bucket *bucket;
};
-static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
- pgoff_t idx_from, pgoff_t idx_to)
-{
- struct rb_node *n;
-
- n = fi->writepages.rb_node;
-
- while (n) {
- struct fuse_writepage_args *wpa;
- pgoff_t curr_index;
-
- wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
- WARN_ON(get_fuse_inode(wpa->inode) != fi);
- curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
- if (idx_from >= curr_index + wpa->ia.ap.num_folios)
- n = n->rb_right;
- else if (idx_to < curr_index)
- n = n->rb_left;
- else
- return wpa;
- }
- return NULL;
-}
-
-/*
- * Check if any page in a range is under writeback
- */
-static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
- pgoff_t idx_to)
-{
- struct fuse_inode *fi = get_fuse_inode(inode);
- bool found;
-
- if (RB_EMPTY_ROOT(&fi->writepages))
- return false;
-
- spin_lock(&fi->lock);
- found = fuse_find_writeback(fi, idx_from, idx_to);
- spin_unlock(&fi->lock);
-
- return found;
-}
-
-static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
-{
- return fuse_range_is_writeback(inode, index, index);
-}
-
-/*
- * Wait for page writeback to be completed.
- *
- * Since fuse doesn't rely on the VM writeback tracking, this has to
- * use some other means.
- */
-static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
-{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
-}
-
-static inline bool fuse_folio_is_writeback(struct inode *inode,
- struct folio *folio)
-{
- pgoff_t last = folio_next_index(folio) - 1;
- return fuse_range_is_writeback(inode, folio_index(folio), last);
-}
-
-static void fuse_wait_on_folio_writeback(struct inode *inode,
- struct folio *folio)
-{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- wait_event(fi->page_waitq, !fuse_folio_is_writeback(inode, folio));
-}
-
/*
* Wait for all pending writepages on the inode to finish.
*
@@ -532,10 +454,6 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (err)
return err;
- inode_lock(inode);
- fuse_sync_writes(inode);
- inode_unlock(inode);
-
err = filemap_check_errors(file->f_mapping);
if (err)
return err;
@@ -875,7 +793,7 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio)
struct inode *inode = folio->mapping->host;
struct fuse_mount *fm = get_fuse_mount(inode);
loff_t pos = folio_pos(folio);
- struct fuse_folio_desc desc = { .length = PAGE_SIZE };
+ struct fuse_folio_desc desc = { .length = folio_size(folio) };
struct fuse_io_args ia = {
.ap.args.page_zeroing = true,
.ap.args.out_pages = true,
@@ -886,13 +804,6 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio)
ssize_t res;
u64 attr_ver;
- /*
- * With the temporary pages that are used to complete writeback, we can
- * have writeback that extends beyond the lifetime of the folio. So
- * make sure we read a properly synced folio.
- */
- fuse_wait_on_folio_writeback(inode, folio);
-
attr_ver = fuse_get_attr_version(fm->fc);
/* Don't overflow end offset */
@@ -965,14 +876,13 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
fuse_io_free(ia);
}
-static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
+static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
+ unsigned int count)
{
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
struct fuse_args_pages *ap = &ia->ap;
loff_t pos = folio_pos(ap->folios[0]);
- /* Currently, all folios in FUSE are one page */
- size_t count = ap->num_folios << PAGE_SHIFT;
ssize_t res;
int err;
@@ -1005,17 +915,13 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
static void fuse_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->mapping->host;
- struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
unsigned int max_pages, nr_pages;
- pgoff_t first = readahead_index(rac);
- pgoff_t last = first + readahead_count(rac) - 1;
+ struct folio *folio = NULL;
if (fuse_is_bad(inode))
return;
- wait_event(fi->page_waitq, !fuse_range_is_writeback(inode, first, last));
-
max_pages = min_t(unsigned int, fc->max_pages,
fc->max_read / PAGE_SIZE);
@@ -1033,8 +939,8 @@ static void fuse_readahead(struct readahead_control *rac)
while (nr_pages) {
struct fuse_io_args *ia;
struct fuse_args_pages *ap;
- struct folio *folio;
unsigned cur_pages = min(max_pages, nr_pages);
+ unsigned int pages = 0;
if (fc->num_background >= fc->congestion_threshold &&
rac->ra->async_size >= readahead_count(rac))
@@ -1046,10 +952,12 @@ static void fuse_readahead(struct readahead_control *rac)
ia = fuse_io_alloc(NULL, cur_pages);
if (!ia)
- return;
+ break;
ap = &ia->ap;
- while (ap->num_folios < cur_pages) {
+ while (pages < cur_pages) {
+ unsigned int folio_pages;
+
/*
* This returns a folio with a ref held on it.
* The ref needs to be held until the request is
@@ -1057,13 +965,31 @@ static void fuse_readahead(struct readahead_control *rac)
* fuse_try_move_page()) drops the ref after it's
* replaced in the page cache.
*/
- folio = __readahead_folio(rac);
+ if (!folio)
+ folio = __readahead_folio(rac);
+
+ folio_pages = folio_nr_pages(folio);
+ if (folio_pages > cur_pages - pages) {
+ /*
+ * Large folios belonging to fuse will never
+ * have more pages than max_pages.
+ */
+ WARN_ON(!pages);
+ break;
+ }
+
ap->folios[ap->num_folios] = folio;
ap->descs[ap->num_folios].length = folio_size(folio);
ap->num_folios++;
+ pages += folio_pages;
+ folio = NULL;
}
- fuse_send_readpages(ia, rac->file);
- nr_pages -= cur_pages;
+ fuse_send_readpages(ia, rac->file, pages << PAGE_SHIFT);
+ nr_pages -= pages;
+ }
+ if (folio) {
+ folio_end_read(folio, false);
+ folio_put(folio);
}
}
@@ -1181,7 +1107,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
int err;
for (i = 0; i < ap->num_folios; i++)
- fuse_wait_on_folio_writeback(inode, ap->folios[i]);
+ folio_wait_writeback(ap->folios[i]);
fuse_write_args_fill(ia, ff, pos, count);
ia->write.in.flags = fuse_write_flags(iocb);
@@ -1226,27 +1152,24 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
unsigned offset = pos & (PAGE_SIZE - 1);
- unsigned int nr_pages = 0;
size_t count = 0;
- int err;
+ unsigned int num;
+ int err = 0;
+
+ num = min(iov_iter_count(ii), fc->max_write);
+ num = min(num, max_pages << PAGE_SHIFT);
ap->args.in_pages = true;
ap->descs[0].offset = offset;
- do {
+ while (num) {
size_t tmp;
struct folio *folio;
pgoff_t index = pos >> PAGE_SHIFT;
- size_t bytes = min_t(size_t, PAGE_SIZE - offset,
- iov_iter_count(ii));
-
- bytes = min_t(size_t, bytes, fc->max_write - count);
+ unsigned int bytes;
+ unsigned int folio_offset;
again:
- err = -EFAULT;
- if (fault_in_iov_iter_readable(ii, bytes))
- break;
-
folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping));
if (IS_ERR(folio)) {
@@ -1257,29 +1180,42 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
if (mapping_writably_mapped(mapping))
flush_dcache_folio(folio);
- tmp = copy_folio_from_iter_atomic(folio, offset, bytes, ii);
+ folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset;
+ bytes = min(folio_size(folio) - folio_offset, num);
+
+ tmp = copy_folio_from_iter_atomic(folio, folio_offset, bytes, ii);
flush_dcache_folio(folio);
if (!tmp) {
folio_unlock(folio);
folio_put(folio);
+
+ /*
+ * Ensure forward progress by faulting in
+ * while not holding the folio lock:
+ */
+ if (fault_in_iov_iter_readable(ii, bytes)) {
+ err = -EFAULT;
+ break;
+ }
+
goto again;
}
- err = 0;
ap->folios[ap->num_folios] = folio;
+ ap->descs[ap->num_folios].offset = folio_offset;
ap->descs[ap->num_folios].length = tmp;
ap->num_folios++;
- nr_pages++;
count += tmp;
pos += tmp;
+ num -= tmp;
offset += tmp;
- if (offset == PAGE_SIZE)
+ if (offset == folio_size(folio))
offset = 0;
- /* If we copied full page, mark it uptodate */
- if (tmp == PAGE_SIZE)
+ /* If we copied full folio, mark it uptodate */
+ if (tmp == folio_size(folio))
folio_mark_uptodate(folio);
if (folio_test_uptodate(folio)) {
@@ -1288,10 +1224,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
ia->write.folio_locked = true;
break;
}
- if (!fc->big_writes)
+ if (!fc->big_writes || offset != 0)
break;
- } while (iov_iter_count(ii) && count < fc->max_write &&
- nr_pages < max_pages && offset == 0);
+ }
return count > 0 ? count : err;
}
@@ -1638,7 +1573,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
return res;
}
}
- if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
+ if (!cuse && filemap_range_has_writeback(mapping, pos, (pos + count - 1))) {
if (!write)
inode_lock(inode);
fuse_sync_writes(inode);
@@ -1835,38 +1770,34 @@ static ssize_t fuse_splice_write(struct pipe_inode_info *pipe, struct file *out,
static void fuse_writepage_free(struct fuse_writepage_args *wpa)
{
struct fuse_args_pages *ap = &wpa->ia.ap;
- int i;
if (wpa->bucket)
fuse_sync_bucket_dec(wpa->bucket);
- for (i = 0; i < ap->num_folios; i++)
- folio_put(ap->folios[i]);
-
fuse_file_put(wpa->ia.ff, false);
kfree(ap->folios);
kfree(wpa);
}
-static void fuse_writepage_finish_stat(struct inode *inode, struct folio *folio)
-{
- struct backing_dev_info *bdi = inode_to_bdi(inode);
-
- dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- node_stat_sub_folio(folio, NR_WRITEBACK_TEMP);
- wb_writeout_inc(&bdi->wb);
-}
-
static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
{
struct fuse_args_pages *ap = &wpa->ia.ap;
struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;
- for (i = 0; i < ap->num_folios; i++)
- fuse_writepage_finish_stat(inode, ap->folios[i]);
+ for (i = 0; i < ap->num_folios; i++) {
+ /*
+ * Benchmarks showed that ending writeback within the
+ * scope of the fi->lock alleviates xarray lock
+ * contention and noticeably improves performance.
+ */
+ folio_end_writeback(ap->folios[i]);
+ dec_wb_stat(&bdi->wb, WB_WRITEBACK);
+ wb_writeout_inc(&bdi->wb);
+ }
wake_up(&fi->page_waitq);
}
@@ -1877,13 +1808,15 @@ static void fuse_send_writepage(struct fuse_mount *fm,
__releases(fi->lock)
__acquires(fi->lock)
{
- struct fuse_writepage_args *aux, *next;
struct fuse_inode *fi = get_fuse_inode(wpa->inode);
+ struct fuse_args_pages *ap = &wpa->ia.ap;
struct fuse_write_in *inarg = &wpa->ia.write.in;
- struct fuse_args *args = &wpa->ia.ap.args;
- /* Currently, all folios in FUSE are one page */
- __u64 data_size = wpa->ia.ap.num_folios * PAGE_SIZE;
- int err;
+ struct fuse_args *args = &ap->args;
+ __u64 data_size = 0;
+ int err, i;
+
+ for (i = 0; i < ap->num_folios; i++)
+ data_size += ap->descs[i].length;
fi->writectr++;
if (inarg->offset + data_size <= size) {
@@ -1914,19 +1847,8 @@ __acquires(fi->lock)
out_free:
fi->writectr--;
- rb_erase(&wpa->writepages_entry, &fi->writepages);
fuse_writepage_finish(wpa);
spin_unlock(&fi->lock);
-
- /* After rb_erase() aux request list is private */
- for (aux = wpa->next; aux; aux = next) {
- next = aux->next;
- aux->next = NULL;
- fuse_writepage_finish_stat(aux->inode,
- aux->ia.ap.folios[0]);
- fuse_writepage_free(aux);
- }
-
fuse_writepage_free(wpa);
spin_lock(&fi->lock);
}
@@ -1954,43 +1876,6 @@ __acquires(fi->lock)
}
}
-static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
- struct fuse_writepage_args *wpa)
-{
- pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
- pgoff_t idx_to = idx_from + wpa->ia.ap.num_folios - 1;
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
-
- WARN_ON(!wpa->ia.ap.num_folios);
- while (*p) {
- struct fuse_writepage_args *curr;
- pgoff_t curr_index;
-
- parent = *p;
- curr = rb_entry(parent, struct fuse_writepage_args,
- writepages_entry);
- WARN_ON(curr->inode != wpa->inode);
- curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
-
- if (idx_from >= curr_index + curr->ia.ap.num_folios)
- p = &(*p)->rb_right;
- else if (idx_to < curr_index)
- p = &(*p)->rb_left;
- else
- return curr;
- }
-
- rb_link_node(&wpa->writepages_entry, parent, p);
- rb_insert_color(&wpa->writepages_entry, root);
- return NULL;
-}
-
-static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
-{
- WARN_ON(fuse_insert_writeback(root, wpa));
-}
-
static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
int error)
{
@@ -2010,41 +1895,6 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
if (!fc->writeback_cache)
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODIFY);
spin_lock(&fi->lock);
- rb_erase(&wpa->writepages_entry, &fi->writepages);
- while (wpa->next) {
- struct fuse_mount *fm = get_fuse_mount(inode);
- struct fuse_write_in *inarg = &wpa->ia.write.in;
- struct fuse_writepage_args *next = wpa->next;
-
- wpa->next = next->next;
- next->next = NULL;
- tree_insert(&fi->writepages, next);
-
- /*
- * Skip fuse_flush_writepages() to make it easy to crop requests
- * based on primary request size.
- *
- * 1st case (trivial): there are no concurrent activities using
- * fuse_set/release_nowrite. Then we're on safe side because
- * fuse_flush_writepages() would call fuse_send_writepage()
- * anyway.
- *
- * 2nd case: someone called fuse_set_nowrite and it is waiting
- * now for completion of all in-flight requests. This happens
- * rarely and no more than once per page, so this should be
- * okay.
- *
- * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
- * of fuse_set_nowrite..fuse_release_nowrite section. The fact
- * that fuse_set_nowrite returned implies that all in-flight
- * requests were completed along with all of their secondary
- * requests. Further primary requests are blocked by negative
- * writectr. Hence there cannot be any in-flight requests and
- * no invocations of fuse_writepage_end() while we're in
- * fuse_set_nowrite..fuse_release_nowrite section.
- */
- fuse_send_writepage(fm, next, inarg->offset + inarg->size);
- }
fi->writectr--;
fuse_writepage_finish(wpa);
spin_unlock(&fi->lock);
@@ -2131,19 +1981,16 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
}
static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio,
- struct folio *tmp_folio, uint32_t folio_index)
+ uint32_t folio_index)
{
struct inode *inode = folio->mapping->host;
struct fuse_args_pages *ap = &wpa->ia.ap;
- folio_copy(tmp_folio, folio);
-
- ap->folios[folio_index] = tmp_folio;
+ ap->folios[folio_index] = folio;
ap->descs[folio_index].offset = 0;
- ap->descs[folio_index].length = PAGE_SIZE;
+ ap->descs[folio_index].length = folio_size(folio);
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP);
}
static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio,
@@ -2178,18 +2025,12 @@ static int fuse_writepage_locked(struct folio *folio)
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_writepage_args *wpa;
struct fuse_args_pages *ap;
- struct folio *tmp_folio;
struct fuse_file *ff;
- int error = -ENOMEM;
+ int error = -EIO;
- tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0);
- if (!tmp_folio)
- goto err;
-
- error = -EIO;
ff = fuse_write_file_get(fi);
if (!ff)
- goto err_nofile;
+ goto err;
wpa = fuse_writepage_args_setup(folio, ff);
error = -ENOMEM;
@@ -2200,22 +2041,17 @@ static int fuse_writepage_locked(struct folio *folio)
ap->num_folios = 1;
folio_start_writeback(folio);
- fuse_writepage_args_page_fill(wpa, folio, tmp_folio, 0);
+ fuse_writepage_args_page_fill(wpa, folio, 0);
spin_lock(&fi->lock);
- tree_insert(&fi->writepages, wpa);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
- folio_end_writeback(folio);
-
return 0;
err_writepage_args:
fuse_file_put(ff, false);
-err_nofile:
- folio_put(tmp_folio);
err:
mapping_set_error(folio->mapping, error);
return error;
@@ -2225,8 +2061,8 @@ struct fuse_fill_wb_data {
struct fuse_writepage_args *wpa;
struct fuse_file *ff;
struct inode *inode;
- struct folio **orig_folios;
unsigned int max_folios;
+ unsigned int nr_pages;
};
static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
@@ -2260,69 +2096,11 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
struct fuse_writepage_args *wpa = data->wpa;
struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
- int num_folios = wpa->ia.ap.num_folios;
- int i;
spin_lock(&fi->lock);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
-
- for (i = 0; i < num_folios; i++)
- folio_end_writeback(data->orig_folios[i]);
-}
-
-/*
- * Check under fi->lock if the page is under writeback, and insert it onto the
- * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's
- * one already added for a page at this offset. If there's none, then insert
- * this new request onto the auxiliary list, otherwise reuse the existing one by
- * swapping the new temp page with the old one.
- */
-static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
- struct folio *folio)
-{
- struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
- struct fuse_writepage_args *tmp;
- struct fuse_writepage_args *old_wpa;
- struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
-
- WARN_ON(new_ap->num_folios != 0);
- new_ap->num_folios = 1;
-
- spin_lock(&fi->lock);
- old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa);
- if (!old_wpa) {
- spin_unlock(&fi->lock);
- return true;
- }
-
- for (tmp = old_wpa->next; tmp; tmp = tmp->next) {
- pgoff_t curr_index;
-
- WARN_ON(tmp->inode != new_wpa->inode);
- curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
- if (curr_index == folio->index) {
- WARN_ON(tmp->ia.ap.num_folios != 1);
- swap(tmp->ia.ap.folios[0], new_ap->folios[0]);
- break;
- }
- }
-
- if (!tmp) {
- new_wpa->next = old_wpa->next;
- old_wpa->next = new_wpa;
- }
-
- spin_unlock(&fi->lock);
-
- if (tmp) {
- fuse_writepage_finish_stat(new_wpa->inode,
- folio);
- fuse_writepage_free(new_wpa);
- }
-
- return false;
}
static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio,
@@ -2331,25 +2109,16 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio,
{
WARN_ON(!ap->num_folios);
- /*
- * Being under writeback is unlikely but possible. For example direct
- * read to an mmaped fuse file will set the page dirty twice; once when
- * the pages are faulted with get_user_pages(), and then after the read
- * completed.
- */
- if (fuse_folio_is_writeback(data->inode, folio))
- return true;
-
/* Reached max pages */
- if (ap->num_folios == fc->max_pages)
+ if (data->nr_pages + folio_nr_pages(folio) > fc->max_pages)
return true;
/* Reached max write bytes */
- if ((ap->num_folios + 1) * PAGE_SIZE > fc->max_write)
+ if ((data->nr_pages * PAGE_SIZE) + folio_size(folio) > fc->max_write)
return true;
/* Discontinuity */
- if (data->orig_folios[ap->num_folios - 1]->index + 1 != folio_index(folio))
+ if (folio_next_index(ap->folios[ap->num_folios - 1]) != folio->index)
return true;
/* Need to grow the pages array? If so, did the expansion fail? */
@@ -2368,7 +2137,6 @@ static int fuse_writepages_fill(struct folio *folio,
struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
- struct folio *tmp_folio;
int err;
if (!data->ff) {
@@ -2381,56 +2149,27 @@ static int fuse_writepages_fill(struct folio *folio,
if (wpa && fuse_writepage_need_send(fc, folio, ap, data)) {
fuse_writepages_send(data);
data->wpa = NULL;
+ data->nr_pages = 0;
}
- err = -ENOMEM;
- tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0);
- if (!tmp_folio)
- goto out_unlock;
-
- /*
- * The page must not be redirtied until the writeout is completed
- * (i.e. userspace has sent a reply to the write request). Otherwise
- * there could be more than one temporary page instance for each real
- * page.
- *
- * This is ensured by holding the page lock in page_mkwrite() while
- * checking fuse_page_is_writeback(). We already hold the page lock
- * since clear_page_dirty_for_io() and keep it held until we add the
- * request to the fi->writepages list and increment ap->num_folios.
- * After this fuse_page_is_writeback() will indicate that the page is
- * under writeback, so we can release the page lock.
- */
if (data->wpa == NULL) {
err = -ENOMEM;
wpa = fuse_writepage_args_setup(folio, data->ff);
- if (!wpa) {
- folio_put(tmp_folio);
+ if (!wpa)
goto out_unlock;
- }
fuse_file_get(wpa->ia.ff);
data->max_folios = 1;
ap = &wpa->ia.ap;
}
folio_start_writeback(folio);
- fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_folios);
- data->orig_folios[ap->num_folios] = folio;
+ fuse_writepage_args_page_fill(wpa, folio, ap->num_folios);
+ data->nr_pages += folio_nr_pages(folio);
err = 0;
- if (data->wpa) {
- /*
- * Protected by fi->lock against concurrent access by
- * fuse_page_is_writeback().
- */
- spin_lock(&fi->lock);
- ap->num_folios++;
- spin_unlock(&fi->lock);
- } else if (fuse_writepage_add(wpa, folio)) {
+ ap->num_folios++;
+ if (!data->wpa)
data->wpa = wpa;
- } else {
- folio_end_writeback(folio);
- }
out_unlock:
folio_unlock(folio);
@@ -2456,13 +2195,7 @@ static int fuse_writepages(struct address_space *mapping,
data.inode = inode;
data.wpa = NULL;
data.ff = NULL;
-
- err = -ENOMEM;
- data.orig_folios = kcalloc(fc->max_pages,
- sizeof(struct folio *),
- GFP_NOFS);
- if (!data.orig_folios)
- goto out;
+ data.nr_pages = 0;
err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
if (data.wpa) {
@@ -2472,7 +2205,6 @@ static int fuse_writepages(struct address_space *mapping,
if (data.ff)
fuse_file_put(data.ff, false);
- kfree(data.orig_folios);
out:
return err;
}
@@ -2497,8 +2229,6 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
if (IS_ERR(folio))
goto error;
- fuse_wait_on_page_writeback(mapping->host, folio->index);
-
if (folio_test_uptodate(folio) || len >= folio_size(folio))
goto success;
/*
@@ -2561,13 +2291,9 @@ static int fuse_launder_folio(struct folio *folio)
{
int err = 0;
if (folio_clear_dirty_for_io(folio)) {
- struct inode *inode = folio->mapping->host;
-
- /* Serialize with pending writeback for the same page */
- fuse_wait_on_page_writeback(inode, folio->index);
err = fuse_writepage_locked(folio);
if (!err)
- fuse_wait_on_page_writeback(inode, folio->index);
+ folio_wait_writeback(folio);
}
return err;
}
@@ -2611,7 +2337,7 @@ static vm_fault_t fuse_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
- fuse_wait_on_folio_writeback(inode, folio);
+ folio_wait_writeback(folio);
return VM_FAULT_LOCKED;
}
@@ -3429,9 +3155,12 @@ static const struct address_space_operations fuse_file_aops = {
void fuse_init_file_inode(struct inode *inode, unsigned int flags)
{
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
inode->i_fop = &fuse_file_operations;
inode->i_data.a_ops = &fuse_file_aops;
+ if (fc->writeback_cache)
+ mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data);
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
@@ -3439,7 +3168,6 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
fi->iocachectr = 0;
init_waitqueue_head(&fi->page_waitq);
init_waitqueue_head(&fi->direct_io_waitq);
- fi->writepages = RB_ROOT;
if (IS_ENABLED(CONFIG_FUSE_DAX))
fuse_dax_inode_init(inode, flags);
diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h
index b3c2e32254ba..5a9bd771a319 100644
--- a/fs/fuse/fuse_dev_i.h
+++ b/fs/fuse/fuse_dev_i.h
@@ -20,7 +20,6 @@ struct fuse_iqueue;
struct fuse_forget_link;
struct fuse_copy_state {
- int write;
struct fuse_req *req;
struct iov_iter *iter;
struct pipe_buffer *pipebufs;
@@ -30,8 +29,9 @@ struct fuse_copy_state {
struct page *pg;
unsigned int len;
unsigned int offset;
- unsigned int move_pages:1;
- unsigned int is_uring:1;
+ bool write:1;
+ bool move_folios:1;
+ bool is_uring:1;
struct {
unsigned int copied_sz; /* copied size into the user buffer */
} ring;
@@ -51,7 +51,7 @@ struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique);
void fuse_dev_end_requests(struct list_head *head);
-void fuse_copy_init(struct fuse_copy_state *cs, int write,
+void fuse_copy_init(struct fuse_copy_state *cs, bool write,
struct iov_iter *iter);
int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs,
unsigned int argpages, struct fuse_arg *args,
@@ -64,7 +64,6 @@ void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req);
bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock);
bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list);
-bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing);
#endif
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d56d4fd956db..b54f4f57789f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -74,8 +74,8 @@ extern struct list_head fuse_conn_list;
extern struct mutex fuse_mutex;
/** Module parameters */
-extern unsigned max_user_bgreq;
-extern unsigned max_user_congthresh;
+extern unsigned int max_user_bgreq;
+extern unsigned int max_user_congthresh;
/* One forget request */
struct fuse_forget_link {
@@ -161,9 +161,6 @@ struct fuse_inode {
/* waitq for direct-io completion */
wait_queue_head_t direct_io_waitq;
-
- /* List of writepage requestst (pending or sent) */
- struct rb_root writepages;
};
/* readdir cache (directory only) */
@@ -636,6 +633,9 @@ struct fuse_conn {
/** Number of fuse_dev's */
atomic_t dev_count;
+ /** Current epoch for up-to-date dentries */
+ atomic_t epoch;
+
struct rcu_head rcu;
/** The user id for this mount */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd48e8d37f2e..bfe8d8af46f3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -41,7 +41,7 @@ unsigned int fuse_max_pages_limit = 256;
unsigned int fuse_default_req_timeout;
unsigned int fuse_max_req_timeout;
-unsigned max_user_bgreq;
+unsigned int max_user_bgreq;
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
&max_user_bgreq, 0644);
__MODULE_PARM_TYPE(max_user_bgreq, "uint");
@@ -49,7 +49,7 @@ MODULE_PARM_DESC(max_user_bgreq,
"Global limit for the maximum number of backgrounded requests an "
"unprivileged user can set");
-unsigned max_user_congthresh;
+unsigned int max_user_congthresh;
module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
&max_user_congthresh, 0644);
__MODULE_PARM_TYPE(max_user_congthresh, "uint");
@@ -962,6 +962,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
init_rwsem(&fc->killsb);
refcount_set(&fc->count, 1);
atomic_set(&fc->dev_count, 1);
+ atomic_set(&fc->epoch, 1);
init_waitqueue_head(&fc->blocked_waitq);
fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
INIT_LIST_HEAD(&fc->bg_queue);
@@ -1036,7 +1037,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
}
EXPORT_SYMBOL_GPL(fuse_conn_get);
-static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
+static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned int mode)
{
struct fuse_attr attr;
memset(&attr, 0, sizeof(attr));
@@ -1211,7 +1212,7 @@ static const struct super_operations fuse_super_operations = {
.show_options = fuse_show_options,
};
-static void sanitize_global_limit(unsigned *limit)
+static void sanitize_global_limit(unsigned int *limit)
{
/*
* The default maximum number of async requests is calculated to consume
@@ -1232,7 +1233,7 @@ static int set_global_limit(const char *val, const struct kernel_param *kp)
if (rv)
return rv;
- sanitize_global_limit((unsigned *)kp->arg);
+ sanitize_global_limit((unsigned int *)kp->arg);
return 0;
}
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index edcd6f18a8a8..c2aae2eef086 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -161,6 +161,7 @@ static int fuse_direntplus_link(struct file *file,
struct fuse_conn *fc;
struct inode *inode;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ int epoch;
if (!o->nodeid) {
/*
@@ -190,6 +191,7 @@ static int fuse_direntplus_link(struct file *file,
return -EIO;
fc = get_fuse_conn(dir);
+ epoch = atomic_read(&fc->epoch);
name.hash = full_name_hash(parent, name.name, name.len);
dentry = d_lookup(parent, &name);
@@ -256,6 +258,7 @@ retry:
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
+ dentry->d_time = epoch;
fuse_change_entry_timeout(dentry, o);
dput(dentry);
@@ -332,35 +335,32 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
{
int plus;
ssize_t res;
- struct folio *folio;
struct inode *inode = file_inode(file);
struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_conn *fc = fm->fc;
struct fuse_io_args ia = {};
- struct fuse_args_pages *ap = &ia.ap;
- struct fuse_folio_desc desc = { .length = PAGE_SIZE };
+ struct fuse_args *args = &ia.ap.args;
+ void *buf;
+ size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT);
u64 attr_version = 0, evict_ctr = 0;
bool locked;
- folio = folio_alloc(GFP_KERNEL, 0);
- if (!folio)
+ buf = kvmalloc(bufsize, GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
+ args->out_args[0].value = buf;
+
plus = fuse_use_readdirplus(inode, ctx);
- ap->args.out_pages = true;
- ap->num_folios = 1;
- ap->folios = &folio;
- ap->descs = &desc;
if (plus) {
attr_version = fuse_get_attr_version(fm->fc);
evict_ctr = fuse_get_evict_ctr(fm->fc);
- fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
- FUSE_READDIRPLUS);
+ fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS);
} else {
- fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
- FUSE_READDIR);
+ fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
- res = fuse_simple_request(fm, &ap->args);
+ res = fuse_simple_request(fm, args);
fuse_unlock_inode(inode, locked);
if (res >= 0) {
if (!res) {
@@ -369,16 +369,14 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
- res = parse_dirplusfile(folio_address(folio), res,
- file, ctx, attr_version,
+ res = parse_dirplusfile(buf, res, file, ctx, attr_version,
evict_ctr);
} else {
- res = parse_dirfile(folio_address(folio), res, file,
- ctx);
+ res = parse_dirfile(buf, res, file, ctx);
}
}
- folio_put(folio);
+ kvfree(buf);
fuse_invalidate_atime(inode);
return res;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 653f0ff4b057..85c491fcf1a3 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -64,7 +64,10 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
void free_sbd(struct gfs2_sbd *sdp)
{
+ struct super_block *sb = sdp->sd_vfs;
+
free_percpu(sdp->sd_lkstats);
+ sb->s_fs_info = NULL;
kfree(sdp);
}
@@ -1314,7 +1317,6 @@ fail_iput:
iput(sdp->sd_inode);
fail_free:
free_sbd(sdp);
- sb->s_fs_info = NULL;
return error;
}
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 748125653d6c..c3c8842920d2 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -764,7 +764,6 @@ fail_reg:
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
wait_for_completion(&sdp->sd_kobj_unregister);
- sb->s_fs_info = NULL;
return error;
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 233abf598f65..3729391a18f3 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1691,6 +1691,8 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
if (wpc->iomap.flags & IOMAP_F_SHARED)
ioend_flags |= IOMAP_IOEND_SHARED;
+ if (folio_test_dropbehind(folio))
+ ioend_flags |= IOMAP_IOEND_DONTCACHE;
if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
ioend_flags |= IOMAP_IOEND_BOUNDARY;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index df575a873ec6..9029cd216912 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -15,6 +15,7 @@
#include <linux/mempool.h>
#include <linux/seq_file.h>
#include <linux/writeback.h>
+#include <linux/migrate.h>
#include "jfs_incore.h"
#include "jfs_superblock.h"
#include "jfs_filsys.h"
@@ -151,7 +152,59 @@ static inline void dec_io(struct folio *folio, blk_status_t status,
handler(folio, anchor->status);
}
+#ifdef CONFIG_MIGRATION
+static int __metapage_migrate_folio(struct address_space *mapping,
+ struct folio *dst, struct folio *src,
+ enum migrate_mode mode)
+{
+ struct meta_anchor *src_anchor = src->private;
+ struct metapage *mps[MPS_PER_PAGE] = {0};
+ struct metapage *mp;
+ int i, rc;
+
+ for (i = 0; i < MPS_PER_PAGE; i++) {
+ mp = src_anchor->mp[i];
+ if (mp && metapage_locked(mp))
+ return -EAGAIN;
+ }
+
+ rc = filemap_migrate_folio(mapping, dst, src, mode);
+ if (rc != MIGRATEPAGE_SUCCESS)
+ return rc;
+
+ for (i = 0; i < MPS_PER_PAGE; i++) {
+ mp = src_anchor->mp[i];
+ if (!mp)
+ continue;
+ if (unlikely(insert_metapage(dst, mp))) {
+ /* If error, roll-back previosly inserted pages */
+ for (int j = 0 ; j < i; j++) {
+ if (mps[j])
+ remove_metapage(dst, mps[j]);
+ }
+ return -EAGAIN;
+ }
+ mps[i] = mp;
+ }
+
+ /* Update the metapage and remove it from src */
+ for (i = 0; i < MPS_PER_PAGE; i++) {
+ mp = mps[i];
+ if (mp) {
+ int page_offset = mp->data - folio_address(src);
+
+ mp->data = folio_address(dst) + page_offset;
+ mp->folio = dst;
+ remove_metapage(src, mp);
+ }
+ }
+
+ return MIGRATEPAGE_SUCCESS;
+}
+#endif /* CONFIG_MIGRATION */
+
#else
+
static inline struct metapage *folio_to_mp(struct folio *folio, int offset)
{
return folio->private;
@@ -175,6 +228,35 @@ static inline void remove_metapage(struct folio *folio, struct metapage *mp)
#define inc_io(folio) do {} while(0)
#define dec_io(folio, status, handler) handler(folio, status)
+#ifdef CONFIG_MIGRATION
+static int __metapage_migrate_folio(struct address_space *mapping,
+ struct folio *dst, struct folio *src,
+ enum migrate_mode mode)
+{
+ struct metapage *mp;
+ int page_offset;
+ int rc;
+
+ mp = folio_to_mp(src, 0);
+ if (metapage_locked(mp))
+ return -EAGAIN;
+
+ rc = filemap_migrate_folio(mapping, dst, src, mode);
+ if (rc != MIGRATEPAGE_SUCCESS)
+ return rc;
+
+ if (unlikely(insert_metapage(dst, mp)))
+ return -EAGAIN;
+
+ page_offset = mp->data - folio_address(src);
+ mp->data = folio_address(dst) + page_offset;
+ mp->folio = dst;
+ remove_metapage(src, mp);
+
+ return MIGRATEPAGE_SUCCESS;
+}
+#endif /* CONFIG_MIGRATION */
+
#endif
static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
@@ -554,6 +636,29 @@ static bool metapage_release_folio(struct folio *folio, gfp_t gfp_mask)
return ret;
}
+#ifdef CONFIG_MIGRATION
+/*
+ * metapage_migrate_folio - Migration function for JFS metapages
+ */
+static int metapage_migrate_folio(struct address_space *mapping,
+ struct folio *dst, struct folio *src,
+ enum migrate_mode mode)
+{
+ int expected_count;
+
+ if (!src->private)
+ return filemap_migrate_folio(mapping, dst, src, mode);
+
+ /* Check whether page does not have extra refs before we do more work */
+ expected_count = folio_expected_ref_count(src) + 1;
+ if (folio_ref_count(src) != expected_count)
+ return -EAGAIN;
+ return __metapage_migrate_folio(mapping, dst, src, mode);
+}
+#else
+#define metapage_migrate_folio NULL
+#endif /* CONFIG_MIGRATION */
+
static void metapage_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
@@ -570,6 +675,7 @@ const struct address_space_operations jfs_metapage_aops = {
.release_folio = metapage_release_folio,
.invalidate_folio = metapage_invalidate_folio,
.dirty_folio = filemap_dirty_folio,
+ .migrate_folio = metapage_migrate_folio,
};
struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
diff --git a/fs/mount.h b/fs/mount.h
index 7aecf2a60472..ad7173037924 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -7,10 +7,6 @@
extern struct list_head notify_list;
-typedef __u32 __bitwise mntns_flags_t;
-
-#define MNTNS_PROPAGATING ((__force mntns_flags_t)(1 << 0))
-
struct mnt_namespace {
struct ns_common ns;
struct mount * root;
@@ -37,7 +33,6 @@ struct mnt_namespace {
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */
refcount_t passive; /* number references not pinning @mounts */
- mntns_flags_t mntns_flags;
} __randomize_layout;
struct mnt_pcp {
diff --git a/fs/namespace.c b/fs/namespace.c
index 552ad7f4d18b..2f2e93927f46 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1326,21 +1326,6 @@ struct vfsmount *vfs_kern_mount(struct file_system_type *type,
}
EXPORT_SYMBOL_GPL(vfs_kern_mount);
-struct vfsmount *
-vfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
- const char *name, void *data)
-{
- /* Until it is worked out how to pass the user namespace
- * through from the parent mount to the submount don't support
- * unprivileged mounts with submounts.
- */
- if (mountpoint->d_sb->s_user_ns != &init_user_ns)
- return ERR_PTR(-EPERM);
-
- return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
-}
-EXPORT_SYMBOL_GPL(vfs_submount);
-
static struct mount *clone_mnt(struct mount *old, struct dentry *root,
int flag)
{
@@ -3649,7 +3634,7 @@ static int do_move_mount(struct path *old_path,
if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
goto out;
- if (is_anon_ns(ns)) {
+ if (is_anon_ns(ns) && ns == p->mnt_ns) {
/*
* Ending up with two files referring to the root of the
* same anonymous mount namespace would cause an error
@@ -3657,16 +3642,7 @@ static int do_move_mount(struct path *old_path,
* twice into the mount tree which would be rejected
* later. But be explicit about it right here.
*/
- if ((is_anon_ns(p->mnt_ns) && ns == p->mnt_ns))
- goto out;
-
- /*
- * If this is an anonymous mount tree ensure that mount
- * propagation can detect mounts that were just
- * propagated to the target mount tree so we don't
- * propagate onto them.
- */
- ns->mntns_flags |= MNTNS_PROPAGATING;
+ goto out;
} else if (is_anon_ns(p->mnt_ns)) {
/*
* Don't allow moving an attached mount tree to an
@@ -3723,8 +3699,6 @@ static int do_move_mount(struct path *old_path,
if (attached)
put_mountpoint(old_mp);
out:
- if (is_anon_ns(ns))
- ns->mntns_flags &= ~MNTNS_PROPAGATING;
unlock_mount(mp);
if (!err) {
if (attached) {
@@ -3900,10 +3874,6 @@ int finish_automount(struct vfsmount *m, const struct path *path)
return PTR_ERR(m);
mnt = real_mount(m);
- /* The new mount record should have at least 2 refs to prevent it being
- * expired before we get a chance to add it
- */
- BUG_ON(mnt_get_count(mnt) < 2);
if (m->mnt_sb == path->mnt->mnt_sb &&
m->mnt_root == dentry) {
@@ -3936,7 +3906,6 @@ int finish_automount(struct vfsmount *m, const struct path *path)
unlock_mount(mp);
if (unlikely(err))
goto discard;
- mntput(m);
return 0;
discard_locked:
@@ -3950,7 +3919,6 @@ discard:
namespace_unlock();
}
mntput(m);
- mntput(m);
return err;
}
@@ -3987,11 +3955,14 @@ void mark_mounts_for_expiry(struct list_head *mounts)
/* extract from the expiration list every vfsmount that matches the
* following criteria:
+ * - already mounted
* - only referenced by its parent vfsmount
* - still marked for expiry (marked on the last call here; marks are
* cleared by mntput())
*/
list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
+ if (!is_mounted(&mnt->mnt))
+ continue;
if (!xchg(&mnt->mnt_expiry_mark, 1) ||
propagate_mount_busy(mnt, 1))
continue;
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 0d1b6d35ff3b..18b3dc74c70e 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -78,7 +78,8 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in
* [!] NOTE: This must be run in the same thread as ->issue_read() was called
* in as we access the readahead_control struct.
*/
-static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
+static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq,
+ struct readahead_control *ractl)
{
struct netfs_io_request *rreq = subreq->rreq;
size_t rsize = subreq->len;
@@ -86,7 +87,7 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
- if (rreq->ractl) {
+ if (ractl) {
/* If we don't have sufficient folios in the rolling buffer,
* extract a folioq's worth from the readahead region at a time
* into the buffer. Note that this acquires a ref on each page
@@ -99,7 +100,7 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
while (rreq->submitted < subreq->start + rsize) {
ssize_t added;
- added = rolling_buffer_load_from_ra(&rreq->buffer, rreq->ractl,
+ added = rolling_buffer_load_from_ra(&rreq->buffer, ractl,
&put_batch);
if (added < 0)
return added;
@@ -211,7 +212,8 @@ static void netfs_issue_read(struct netfs_io_request *rreq,
* slicing up the region to be read according to available cache blocks and
* network rsize.
*/
-static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
+static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
+ struct readahead_control *ractl)
{
struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned long long start = rreq->start;
@@ -262,9 +264,9 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
if (ret < 0) {
subreq->error = ret;
/* Not queued - release both refs. */
- netfs_put_subrequest(subreq, false,
+ netfs_put_subrequest(subreq,
netfs_sreq_trace_put_cancel);
- netfs_put_subrequest(subreq, false,
+ netfs_put_subrequest(subreq,
netfs_sreq_trace_put_cancel);
break;
}
@@ -291,14 +293,14 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
break;
issue:
- slice = netfs_prepare_read_iterator(subreq);
+ slice = netfs_prepare_read_iterator(subreq, ractl);
if (slice < 0) {
ret = slice;
subreq->error = ret;
trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
/* Not queued - release both refs. */
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
break;
}
size -= slice;
@@ -312,7 +314,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
if (unlikely(size > 0)) {
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
- netfs_wake_read_collector(rreq);
+ netfs_wake_collector(rreq);
}
/* Defer error return as we may need to wait for outstanding I/O. */
@@ -359,18 +361,15 @@ void netfs_readahead(struct readahead_control *ractl)
netfs_rreq_expand(rreq, ractl);
- rreq->ractl = ractl;
rreq->submitted = rreq->start;
if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
goto cleanup_free;
- netfs_read_to_pagecache(rreq);
+ netfs_read_to_pagecache(rreq, ractl);
- netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
- return;
+ return netfs_put_request(rreq, netfs_rreq_trace_put_return);
cleanup_free:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
- return;
+ return netfs_put_request(rreq, netfs_rreq_trace_put_failed);
}
EXPORT_SYMBOL(netfs_readahead);
@@ -389,7 +388,6 @@ static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct fo
if (added < 0)
return added;
rreq->submitted = rreq->start + added;
- rreq->ractl = (struct readahead_control *)1UL;
return 0;
}
@@ -459,7 +457,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
rreq->submitted = rreq->start + flen;
- netfs_read_to_pagecache(rreq);
+ netfs_read_to_pagecache(rreq, NULL);
if (sink)
folio_put(sink);
@@ -470,11 +468,11 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
folio_mark_uptodate(folio);
}
folio_unlock(folio);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
discard:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+ netfs_put_request(rreq, netfs_rreq_trace_put_discard);
alloc_error:
folio_unlock(folio);
return ret;
@@ -528,13 +526,13 @@ int netfs_read_folio(struct file *file, struct folio *folio)
if (ret < 0)
goto discard;
- netfs_read_to_pagecache(rreq);
+ netfs_read_to_pagecache(rreq, NULL);
ret = netfs_wait_for_read(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
discard:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+ netfs_put_request(rreq, netfs_rreq_trace_put_discard);
alloc_error:
folio_unlock(folio);
return ret;
@@ -685,11 +683,11 @@ retry:
if (ret < 0)
goto error_put;
- netfs_read_to_pagecache(rreq);
+ netfs_read_to_pagecache(rreq, NULL);
ret = netfs_wait_for_read(rreq);
if (ret < 0)
goto error;
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
have_folio:
ret = folio_wait_private_2_killable(folio);
@@ -701,7 +699,7 @@ have_folio_no_wait:
return 0;
error_put:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
+ netfs_put_request(rreq, netfs_rreq_trace_put_failed);
error:
if (folio) {
folio_unlock(folio);
@@ -750,13 +748,13 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
if (ret < 0)
goto error_put;
- netfs_read_to_pagecache(rreq);
+ netfs_read_to_pagecache(rreq, NULL);
ret = netfs_wait_for_read(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
error_put:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+ netfs_put_request(rreq, netfs_rreq_trace_put_discard);
error:
_leave(" = %d", ret);
return ret;
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index b4826360a411..72a3e6db2524 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -115,8 +115,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
size_t max_chunk = mapping_max_folio_size(mapping);
bool maybe_trouble = false;
- if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) ||
- iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC))
+ if (unlikely(iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC))
) {
wbc_attach_fdatawrite_inode(&wbc, mapping->host);
@@ -386,7 +385,7 @@ out:
wbc_detach_inode(&wbc);
if (ret2 == -EIOCBQUEUED)
return ret2;
- if (ret == 0)
+ if (ret == 0 && ret2 < 0)
ret = ret2;
}
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index 5e3f0aeb51f3..a05e13472baf 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -85,7 +85,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
break;
}
}
@@ -103,19 +103,16 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
rreq->netfs_ops->issue_read(subreq);
if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
- netfs_wait_for_pause(rreq);
+ netfs_wait_for_paused_read(rreq);
if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
break;
- if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
- test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
- break;
cond_resched();
} while (size > 0);
if (unlikely(size > 0)) {
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
- netfs_wake_read_collector(rreq);
+ netfs_wake_collector(rreq);
}
return ret;
@@ -144,7 +141,7 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
ret = netfs_dispatch_unbuffered_reads(rreq);
if (!rreq->submitted) {
- netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
+ netfs_put_request(rreq, netfs_rreq_trace_put_no_submit);
inode_dio_end(rreq->inode);
ret = 0;
goto out;
@@ -188,7 +185,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
iocb->ki_pos, orig_count,
- NETFS_DIO_READ);
+ iocb->ki_flags & IOCB_DIRECT ?
+ NETFS_DIO_READ : NETFS_UNBUFFERED_READ);
if (IS_ERR(rreq))
return PTR_ERR(rreq);
@@ -236,7 +234,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
}
out:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
if (ret > 0)
orig_count -= ret;
return ret;
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 42ce53cc216e..fa9a5bf3c6d5 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -87,6 +87,8 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
}
__set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags);
+ if (async)
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
/* Copy the data into the bounce buffer and encrypt it. */
// TODO
@@ -105,19 +107,15 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
if (!async) {
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
- wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
- ret = wreq->error;
- if (ret == 0) {
- ret = wreq->transferred;
+ ret = netfs_wait_for_write(wreq);
+ if (ret > 0)
iocb->ki_pos += ret;
- }
} else {
ret = -EIOCBQUEUED;
}
out:
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
return ret;
}
EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked);
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index b1722a82c03d..e4308457633c 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -192,8 +192,7 @@ EXPORT_SYMBOL(__fscache_clear_page_bits);
/*
* Deal with the completion of writing the data to the cache.
*/
-static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
- bool was_async)
+static void fscache_wreq_done(void *priv, ssize_t transferred_or_error)
{
struct fscache_write_request *wreq = priv;
@@ -202,8 +201,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
wreq->set_bits);
if (wreq->term_func)
- wreq->term_func(wreq->term_func_priv, transferred_or_error,
- was_async);
+ wreq->term_func(wreq->term_func_priv, transferred_or_error);
fscache_end_operation(&wreq->cache_resources);
kfree(wreq);
}
@@ -255,14 +253,14 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
return;
abandon_end:
- return fscache_wreq_done(wreq, ret, false);
+ return fscache_wreq_done(wreq, ret);
abandon_free:
kfree(wreq);
abandon:
if (using_pgpriv2)
fscache_clear_page_bits(mapping, start, len, cond);
if (term_func)
- term_func(term_func_priv, ret, false);
+ term_func(term_func_priv, ret);
}
EXPORT_SYMBOL(__fscache_write_to_cache);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 1c4f953c3d68..e2ee9183392b 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -23,7 +23,7 @@
/*
* buffered_read.c
*/
-void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
@@ -62,6 +62,14 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq,
enum netfs_folioq_trace trace);
void netfs_reset_iter(struct netfs_io_subrequest *subreq);
+void netfs_wake_collector(struct netfs_io_request *rreq);
+void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq);
+void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
+ struct netfs_io_stream *stream);
+ssize_t netfs_wait_for_read(struct netfs_io_request *rreq);
+ssize_t netfs_wait_for_write(struct netfs_io_request *rreq);
+void netfs_wait_for_paused_read(struct netfs_io_request *rreq);
+void netfs_wait_for_paused_write(struct netfs_io_request *rreq);
/*
* objects.c
@@ -71,9 +79,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
loff_t start, size_t len,
enum netfs_io_origin origin);
void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
-void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async);
-void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
- enum netfs_rreq_ref_trace what);
+void netfs_clear_subrequests(struct netfs_io_request *rreq);
+void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq);
static inline void netfs_see_request(struct netfs_io_request *rreq,
@@ -92,11 +99,9 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq,
/*
* read_collect.c
*/
+bool netfs_read_collection(struct netfs_io_request *rreq);
void netfs_read_collection_worker(struct work_struct *work);
-void netfs_wake_read_collector(struct netfs_io_request *rreq);
-void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
-ssize_t netfs_wait_for_read(struct netfs_io_request *rreq);
-void netfs_wait_for_pause(struct netfs_io_request *rreq);
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
/*
* read_pgpriv2.c
@@ -176,8 +181,8 @@ static inline void netfs_stat_d(atomic_t *stat)
* write_collect.c
*/
int netfs_folio_written_back(struct folio *folio);
+bool netfs_write_collection(struct netfs_io_request *wreq);
void netfs_write_collection_worker(struct work_struct *work);
-void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async);
/*
* write_issue.c
@@ -198,8 +203,8 @@ struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len
int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache);
-int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
- struct folio *writethrough_cache);
+ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
+ struct folio *writethrough_cache);
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
/*
@@ -255,6 +260,21 @@ static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
}
/*
+ * Clear and wake up a NETFS_RREQ_* flag bit on a request.
+ */
+static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq,
+ unsigned int rreq_flag,
+ enum netfs_rreq_trace trace)
+{
+ if (test_bit(rreq_flag, &rreq->flags)) {
+ trace_netfs_rreq(rreq, trace);
+ clear_bit_unlock(rreq_flag, &rreq->flags);
+ smp_mb__after_atomic(); /* Set flag before task state */
+ wake_up(&rreq->waitq);
+ }
+}
+
+/*
* fscache-cache.c
*/
#ifdef CONFIG_PROC_FS
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 70ecc8f5f210..3db401d269e7 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
[NETFS_READ_GAPS] = "RG",
[NETFS_READ_SINGLE] = "R1",
[NETFS_READ_FOR_WRITE] = "RW",
+ [NETFS_UNBUFFERED_READ] = "UR",
[NETFS_DIO_READ] = "DR",
[NETFS_WRITEBACK] = "WB",
[NETFS_WRITEBACK_SINGLE] = "W1",
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 7099aa07737a..43b67a28a8fa 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -313,3 +313,222 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp)
return true;
}
EXPORT_SYMBOL(netfs_release_folio);
+
+/*
+ * Wake the collection work item.
+ */
+void netfs_wake_collector(struct netfs_io_request *rreq)
+{
+ if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) &&
+ !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) {
+ queue_work(system_unbound_wq, &rreq->work);
+ } else {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue);
+ wake_up(&rreq->waitq);
+ }
+}
+
+/*
+ * Mark a subrequest as no longer being in progress and, if need be, wake the
+ * collector.
+ */
+void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *rreq = subreq->rreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[subreq->stream_nr];
+
+ clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */
+
+ /* If we are at the head of the queue, wake up the collector. */
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests) ||
+ test_bit(NETFS_RREQ_RETRYING, &rreq->flags))
+ netfs_wake_collector(rreq);
+}
+
+/*
+ * Wait for all outstanding I/O in a stream to quiesce.
+ */
+void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
+ struct netfs_io_stream *stream)
+{
+ struct netfs_io_subrequest *subreq;
+ DEFINE_WAIT(myself);
+
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ continue;
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ for (;;) {
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ break;
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+ }
+
+ finish_wait(&rreq->waitq, &myself);
+}
+
+/*
+ * Perform collection in app thread if not offloaded to workqueue.
+ */
+static int netfs_collect_in_app(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
+{
+ bool need_collect = false, inactive = true;
+
+ for (int i = 0; i < NR_IO_STREAMS; i++) {
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[i];
+
+ if (!stream->active)
+ continue;
+ inactive = false;
+ trace_netfs_collect_stream(rreq, stream);
+ subreq = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest,
+ rreq_link);
+ if (subreq &&
+ (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
+ need_collect = true;
+ break;
+ }
+ }
+
+ if (!need_collect && !inactive)
+ return 0; /* Sleep */
+
+ __set_current_state(TASK_RUNNING);
+ if (collector(rreq)) {
+ /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */
+ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
+ return 1; /* Done */
+ }
+
+ if (inactive) {
+ WARN(true, "Failed to collect inactive req R=%08x\n",
+ rreq->debug_id);
+ cond_resched();
+ }
+ return 2; /* Again */
+}
+
+/*
+ * Wait for a request to complete, successfully or otherwise.
+ */
+static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
+{
+ DEFINE_WAIT(myself);
+ ssize_t ret;
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
+ switch (netfs_collect_in_app(rreq, collector)) {
+ case 0:
+ break;
+ case 1:
+ goto all_collected;
+ case 2:
+ continue;
+ }
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+all_collected:
+ finish_wait(&rreq->waitq, &myself);
+
+ ret = rreq->error;
+ if (ret == 0) {
+ ret = rreq->transferred;
+ switch (rreq->origin) {
+ case NETFS_DIO_READ:
+ case NETFS_DIO_WRITE:
+ case NETFS_READ_SINGLE:
+ case NETFS_UNBUFFERED_READ:
+ case NETFS_UNBUFFERED_WRITE:
+ break;
+ default:
+ if (rreq->submitted < rreq->len) {
+ trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
+ ret = -EIO;
+ }
+ break;
+ }
+ }
+
+ return ret;
+}
+
+ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_request(rreq, netfs_read_collection);
+}
+
+ssize_t netfs_wait_for_write(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_request(rreq, netfs_write_collection);
+}
+
+/*
+ * Wait for a paused operation to unpause or complete in some manner.
+ */
+static void netfs_wait_for_pause(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
+{
+ DEFINE_WAIT(myself);
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
+ switch (netfs_collect_in_app(rreq, collector)) {
+ case 0:
+ break;
+ case 1:
+ goto all_collected;
+ case 2:
+ continue;
+ }
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
+ !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+all_collected:
+ finish_wait(&rreq->waitq, &myself);
+}
+
+void netfs_wait_for_paused_read(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_pause(rreq, netfs_read_collection);
+}
+
+void netfs_wait_for_paused_write(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_pause(rreq, netfs_write_collection);
+}
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index dc6b41ef18b0..e8c99738b5bb 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -10,6 +10,8 @@
#include <linux/delay.h>
#include "internal.h"
+static void netfs_free_request(struct work_struct *work);
+
/*
* Allocate an I/O request and initialise it.
*/
@@ -34,6 +36,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
memset(rreq, 0, kmem_cache_size(cache));
+ INIT_WORK(&rreq->cleanup_work, netfs_free_request);
rreq->start = start;
rreq->len = len;
rreq->origin = origin;
@@ -49,13 +52,14 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
INIT_LIST_HEAD(&rreq->io_streams[0].subrequests);
INIT_LIST_HEAD(&rreq->io_streams[1].subrequests);
init_waitqueue_head(&rreq->waitq);
- refcount_set(&rreq->ref, 1);
+ refcount_set(&rreq->ref, 2);
if (origin == NETFS_READAHEAD ||
origin == NETFS_READPAGE ||
origin == NETFS_READ_GAPS ||
origin == NETFS_READ_SINGLE ||
origin == NETFS_READ_FOR_WRITE ||
+ origin == NETFS_UNBUFFERED_READ ||
origin == NETFS_DIO_READ) {
INIT_WORK(&rreq->work, netfs_read_collection_worker);
rreq->io_streams[0].avail = true;
@@ -64,8 +68,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
- if (file && file->f_flags & O_NONBLOCK)
- __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
if (rreq->netfs_ops->init_request) {
ret = rreq->netfs_ops->init_request(rreq, file);
if (ret < 0) {
@@ -75,7 +77,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
atomic_inc(&ctx->io_count);
- trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
+ trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), netfs_rreq_trace_new);
netfs_proc_add_rreq(rreq);
netfs_stat(&netfs_n_rh_rreq);
return rreq;
@@ -89,7 +91,7 @@ void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace
trace_netfs_rreq_ref(rreq->debug_id, r + 1, what);
}
-void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
+void netfs_clear_subrequests(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream;
@@ -101,8 +103,7 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
subreq = list_first_entry(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, was_async,
- netfs_sreq_trace_put_clear);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_clear);
}
}
}
@@ -118,13 +119,19 @@ static void netfs_free_request_rcu(struct rcu_head *rcu)
static void netfs_free_request(struct work_struct *work)
{
struct netfs_io_request *rreq =
- container_of(work, struct netfs_io_request, work);
+ container_of(work, struct netfs_io_request, cleanup_work);
struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned int i;
trace_netfs_rreq(rreq, netfs_rreq_trace_free);
+
+ /* Cancel/flush the result collection worker. That does not carry a
+ * ref of its own, so we must wait for it somewhere.
+ */
+ cancel_work_sync(&rreq->work);
+
netfs_proc_del_rreq(rreq);
- netfs_clear_subrequests(rreq, false);
+ netfs_clear_subrequests(rreq);
if (rreq->netfs_ops->free_request)
rreq->netfs_ops->free_request(rreq);
if (rreq->cache_resources.ops)
@@ -145,8 +152,7 @@ static void netfs_free_request(struct work_struct *work)
call_rcu(&rreq->rcu, netfs_free_request_rcu);
}
-void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
- enum netfs_rreq_ref_trace what)
+void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what)
{
unsigned int debug_id;
bool dead;
@@ -156,15 +162,8 @@ void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
debug_id = rreq->debug_id;
dead = __refcount_dec_and_test(&rreq->ref, &r);
trace_netfs_rreq_ref(debug_id, r - 1, what);
- if (dead) {
- if (was_async) {
- rreq->work.func = netfs_free_request;
- if (!queue_work(system_unbound_wq, &rreq->work))
- WARN_ON(1);
- } else {
- netfs_free_request(&rreq->work);
- }
- }
+ if (dead)
+ WARN_ON(!queue_work(system_unbound_wq, &rreq->cleanup_work));
}
}
@@ -206,8 +205,7 @@ void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
what);
}
-static void netfs_free_subrequest(struct netfs_io_subrequest *subreq,
- bool was_async)
+static void netfs_free_subrequest(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
@@ -216,10 +214,10 @@ static void netfs_free_subrequest(struct netfs_io_subrequest *subreq,
rreq->netfs_ops->free_subrequest(subreq);
mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool);
netfs_stat_d(&netfs_n_rh_sreq);
- netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq);
+ netfs_put_request(rreq, netfs_rreq_trace_put_subreq);
}
-void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async,
+void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
enum netfs_sreq_ref_trace what)
{
unsigned int debug_index = subreq->debug_index;
@@ -230,5 +228,5 @@ void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async,
dead = __refcount_dec_and_test(&subreq->ref, &r);
trace_netfs_sreq_ref(debug_id, debug_index, r - 1, what);
if (dead)
- netfs_free_subrequest(subreq, was_async);
+ netfs_free_subrequest(subreq);
}
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 23c75755ad4e..96ee18af28ef 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -83,14 +83,12 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
}
just_unlock:
- if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
- if (folio->index == rreq->no_unlock_folio &&
- test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
- _debug("no unlock");
- } else {
- trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
- folio_unlock(folio);
- }
+ if (folio->index == rreq->no_unlock_folio &&
+ test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
+ _debug("no unlock");
+ } else {
+ trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
+ folio_unlock(folio);
}
folioq_clear(folioq, slot);
@@ -280,9 +278,13 @@ reassess:
stream->need_retry = true;
notes |= NEED_RETRY | MADE_PROGRESS;
break;
+ } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) {
+ notes |= MADE_PROGRESS;
} else {
if (!stream->failed)
- stream->transferred = stream->collected_to - rreq->start;
+ stream->transferred += transferred;
+ if (front->transferred < front->len)
+ set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags);
notes |= MADE_PROGRESS;
}
@@ -297,7 +299,7 @@ reassess:
struct netfs_io_subrequest, rreq_link);
stream->front = front;
spin_unlock(&rreq->lock);
- netfs_put_subrequest(remove, false,
+ netfs_put_subrequest(remove,
notes & ABANDON_SREQ ?
netfs_sreq_trace_put_abandon :
netfs_sreq_trace_put_done);
@@ -311,14 +313,8 @@ reassess:
if (notes & NEED_RETRY)
goto need_retry;
- if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_unpause);
- clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags);
- smp_mb__after_atomic(); /* Set PAUSE before task state */
- wake_up(&rreq->waitq);
- }
-
if (notes & MADE_PROGRESS) {
+ netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
//cond_resched();
goto reassess;
}
@@ -342,24 +338,10 @@ need_retry:
*/
static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
unsigned int i;
- /* Collect unbuffered reads and direct reads, adding up the transfer
- * sizes until we find the first short or failed subrequest.
- */
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- rreq->transferred += subreq->transferred;
-
- if (subreq->transferred < subreq->len ||
- test_bit(NETFS_SREQ_FAILED, &subreq->flags)) {
- rreq->error = subreq->error;
- break;
- }
- }
-
- if (rreq->origin == NETFS_DIO_READ) {
+ if (rreq->origin == NETFS_UNBUFFERED_READ ||
+ rreq->origin == NETFS_DIO_READ) {
for (i = 0; i < rreq->direct_bv_count; i++) {
flush_dcache_page(rreq->direct_bv[i].bv_page);
// TODO: cifs marks pages in the destination buffer
@@ -377,7 +359,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
- if (rreq->origin == NETFS_DIO_READ)
+ if (rreq->origin == NETFS_UNBUFFERED_READ ||
+ rreq->origin == NETFS_DIO_READ)
inode_dio_end(rreq->inode);
}
@@ -410,7 +393,7 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
* Note that we're in normal kernel thread context at this point, possibly
* running on a workqueue.
*/
-static void netfs_read_collection(struct netfs_io_request *rreq)
+bool netfs_read_collection(struct netfs_io_request *rreq)
{
struct netfs_io_stream *stream = &rreq->io_streams[0];
@@ -420,11 +403,11 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
* queue is empty.
*/
if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
- return;
+ return false;
smp_rmb(); /* Read ALL_QUEUED before subreq lists. */
if (!list_empty(&stream->subrequests))
- return;
+ return false;
/* Okay, declare that all I/O is complete. */
rreq->transferred = stream->transferred;
@@ -433,6 +416,7 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
//netfs_rreq_is_still_valid(rreq);
switch (rreq->origin) {
+ case NETFS_UNBUFFERED_READ:
case NETFS_DIO_READ:
case NETFS_READ_GAPS:
netfs_rreq_assess_dio(rreq);
@@ -445,14 +429,15 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
}
task_io_account_read(rreq->transferred);
- trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
- clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
+ netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
+ /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
trace_netfs_rreq(rreq, netfs_rreq_trace_done);
- netfs_clear_subrequests(rreq, false);
+ netfs_clear_subrequests(rreq);
netfs_unlock_abandoned_read_pages(rreq);
if (unlikely(rreq->copy_to_cache))
netfs_pgpriv2_end_copy_to_cache(rreq);
+ return true;
}
void netfs_read_collection_worker(struct work_struct *work)
@@ -460,26 +445,12 @@ void netfs_read_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
- netfs_read_collection(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_work);
-}
-
-/*
- * Wake the collection work item.
- */
-void netfs_wake_read_collector(struct netfs_io_request *rreq)
-{
- if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) &&
- !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) {
- if (!work_pending(&rreq->work)) {
- netfs_get_request(rreq, netfs_rreq_trace_get_work);
- if (!queue_work(system_unbound_wq, &rreq->work))
- netfs_put_request(rreq, true, netfs_rreq_trace_put_work_nq);
- }
- } else {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue);
- wake_up(&rreq->waitq);
+ if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_read_collection(rreq))
+ /* Drop the ref from the IN_PROGRESS flag. */
+ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
+ else
+ netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
}
}
@@ -511,7 +482,7 @@ void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq)
list_is_first(&subreq->rreq_link, &stream->subrequests)
) {
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
- netfs_wake_read_collector(rreq);
+ netfs_wake_collector(rreq);
}
}
EXPORT_SYMBOL(netfs_read_subreq_progress);
@@ -535,7 +506,6 @@ EXPORT_SYMBOL(netfs_read_subreq_progress);
void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
switch (subreq->source) {
case NETFS_READ_FROM_CACHE:
@@ -582,23 +552,15 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
}
trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
-
- clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
- smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */
-
- /* If we are at the head of the queue, wake up the collector. */
- if (list_is_first(&subreq->rreq_link, &stream->subrequests) ||
- test_bit(NETFS_RREQ_RETRYING, &rreq->flags))
- netfs_wake_read_collector(rreq);
-
- netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated);
+ netfs_subreq_clear_in_progress(subreq);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
}
EXPORT_SYMBOL(netfs_read_subreq_terminated);
/*
* Handle termination of a read from the cache.
*/
-void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async)
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error)
{
struct netfs_io_subrequest *subreq = priv;
@@ -613,94 +575,3 @@ void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool
}
netfs_read_subreq_terminated(subreq);
}
-
-/*
- * Wait for the read operation to complete, successfully or otherwise.
- */
-ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
-{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
- DEFINE_WAIT(myself);
- ssize_t ret;
-
- for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
-
- subreq = list_first_entry_or_null(&stream->subrequests,
- struct netfs_io_subrequest, rreq_link);
- if (subreq &&
- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
- test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
- __set_current_state(TASK_RUNNING);
- netfs_read_collection(rreq);
- continue;
- }
-
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
- break;
-
- schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
- }
-
- finish_wait(&rreq->waitq, &myself);
-
- ret = rreq->error;
- if (ret == 0) {
- ret = rreq->transferred;
- switch (rreq->origin) {
- case NETFS_DIO_READ:
- case NETFS_READ_SINGLE:
- ret = rreq->transferred;
- break;
- default:
- if (rreq->submitted < rreq->len) {
- trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
- ret = -EIO;
- }
- break;
- }
- }
-
- return ret;
-}
-
-/*
- * Wait for a paused read operation to unpause or complete in some manner.
- */
-void netfs_wait_for_pause(struct netfs_io_request *rreq)
-{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
- DEFINE_WAIT(myself);
-
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
-
- for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
-
- if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
- subreq = list_first_entry_or_null(&stream->subrequests,
- struct netfs_io_subrequest, rreq_link);
- if (subreq &&
- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
- test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
- __set_current_state(TASK_RUNNING);
- netfs_read_collection(rreq);
- continue;
- }
- }
-
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
- !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
- break;
-
- schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
- }
-
- finish_wait(&rreq->waitq, &myself);
-}
diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c
index cf7727060215..5bbe906a551d 100644
--- a/fs/netfs/read_pgpriv2.c
+++ b/fs/netfs/read_pgpriv2.c
@@ -116,7 +116,7 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache(
return creq;
cancel_put:
- netfs_put_request(creq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(creq, netfs_rreq_trace_put_return);
cancel:
rreq->copy_to_cache = ERR_PTR(-ENOBUFS);
clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
@@ -155,7 +155,7 @@ void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq)
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags);
- netfs_put_request(creq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(creq, netfs_rreq_trace_put_return);
creq->copy_to_cache = NULL;
}
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index 0f294b26e08c..b99e84a8170a 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -173,7 +173,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous);
list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
}
@@ -257,35 +257,15 @@ abandon:
*/
void netfs_retry_reads(struct netfs_io_request *rreq)
{
- struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream = &rreq->io_streams[0];
- DEFINE_WAIT(myself);
netfs_stat(&netfs_n_rh_retry_read_req);
- set_bit(NETFS_RREQ_RETRYING, &rreq->flags);
-
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
- continue;
-
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
- for (;;) {
- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
-
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
- break;
-
- trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
- schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
- }
-
- finish_wait(&rreq->waitq, &myself);
- }
+ set_bit(NETFS_RREQ_RETRYING, &rreq->flags);
+ netfs_wait_for_in_progress_stream(rreq, stream);
clear_bit(NETFS_RREQ_RETRYING, &rreq->flags);
trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c
index fea0ecdecc53..fa622a6cd56d 100644
--- a/fs/netfs/read_single.c
+++ b/fs/netfs/read_single.c
@@ -142,7 +142,7 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
return ret;
cancel:
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
return ret;
}
@@ -185,11 +185,11 @@ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_ite
netfs_single_dispatch_read(rreq);
ret = netfs_wait_for_read(rreq);
- netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret;
cleanup_free:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
+ netfs_put_request(rreq, netfs_rreq_trace_put_failed);
return ret;
}
EXPORT_SYMBOL(netfs_read_single);
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 3fca59e6475d..e2b102ffb768 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -280,7 +280,7 @@ reassess_streams:
struct netfs_io_subrequest, rreq_link);
stream->front = front;
spin_unlock(&wreq->lock);
- netfs_put_subrequest(remove, false,
+ netfs_put_subrequest(remove,
notes & SAW_FAILURE ?
netfs_sreq_trace_put_cancel :
netfs_sreq_trace_put_done);
@@ -321,18 +321,14 @@ reassess_streams:
if (notes & NEED_RETRY)
goto need_retry;
- if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
- trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
- clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
- smp_mb__after_atomic(); /* Set PAUSE before task state */
- wake_up(&wreq->waitq);
- }
- if (notes & NEED_REASSESS) {
+ if (notes & MADE_PROGRESS) {
+ netfs_wake_rreq_flag(wreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
//cond_resched();
goto reassess_streams;
}
- if (notes & MADE_PROGRESS) {
+
+ if (notes & NEED_REASSESS) {
//cond_resched();
goto reassess_streams;
}
@@ -356,30 +352,21 @@ need_retry:
/*
* Perform the collection of subrequests, folios and encryption buffers.
*/
-void netfs_write_collection_worker(struct work_struct *work)
+bool netfs_write_collection(struct netfs_io_request *wreq)
{
- struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work);
struct netfs_inode *ictx = netfs_inode(wreq->inode);
size_t transferred;
int s;
_enter("R=%x", wreq->debug_id);
- netfs_see_request(wreq, netfs_rreq_trace_see_work);
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
- return;
- }
-
netfs_collect_write_results(wreq);
/* We're done when the app thread has finished posting subreqs and all
* the queues in all the streams are empty.
*/
- if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
- return;
- }
+ if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags))
+ return false;
smp_rmb(); /* Read ALL_QUEUED before lists. */
transferred = LONG_MAX;
@@ -387,10 +374,8 @@ void netfs_write_collection_worker(struct work_struct *work)
struct netfs_io_stream *stream = &wreq->io_streams[s];
if (!stream->active)
continue;
- if (!list_empty(&stream->subrequests)) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
- return;
- }
+ if (!list_empty(&stream->subrequests))
+ return false;
if (stream->transferred < transferred)
transferred = stream->transferred;
}
@@ -428,8 +413,8 @@ void netfs_write_collection_worker(struct work_struct *work)
inode_dio_end(wreq->inode);
_debug("finished");
- trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
- clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
+ netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
+ /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
if (wreq->iocb) {
size_t written = min(wreq->transferred, wreq->len);
@@ -440,19 +425,21 @@ void netfs_write_collection_worker(struct work_struct *work)
wreq->iocb = VFS_PTR_POISON;
}
- netfs_clear_subrequests(wreq, false);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete);
+ netfs_clear_subrequests(wreq);
+ return true;
}
-/*
- * Wake the collection work item.
- */
-void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
+void netfs_write_collection_worker(struct work_struct *work)
{
- if (!work_pending(&wreq->work)) {
- netfs_get_request(wreq, netfs_rreq_trace_get_work);
- if (!queue_work(system_unbound_wq, &wreq->work))
- netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq);
+ struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
+
+ netfs_see_request(rreq, netfs_rreq_trace_see_work);
+ if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_write_collection(rreq))
+ /* Drop the ref from the IN_PROGRESS flag. */
+ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
+ else
+ netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
}
}
@@ -460,7 +447,6 @@ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
* netfs_write_subrequest_terminated - Note the termination of a write operation.
* @_op: The I/O request that has terminated.
* @transferred_or_error: The amount of data transferred or an error code.
- * @was_async: The termination was asynchronous
*
* This tells the library that a contributory write I/O operation has
* terminated, one way or another, and that it should collect the results.
@@ -470,21 +456,16 @@ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
* negative error code. The library will look after reissuing I/O operations
* as appropriate and writing downloaded data to the cache.
*
- * If @was_async is true, the caller might be running in softirq or interrupt
- * context and we can't sleep.
- *
* When this is called, ownership of the subrequest is transferred back to the
* library, along with a ref.
*
* Note that %_op is a void* so that the function can be passed to
* kiocb::term_func without the need for a casting wrapper.
*/
-void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
- bool was_async)
+void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error)
{
struct netfs_io_subrequest *subreq = _op;
struct netfs_io_request *wreq = subreq->rreq;
- struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
@@ -495,8 +476,6 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
case NETFS_WRITE_TO_CACHE:
netfs_stat(&netfs_n_wh_write_done);
break;
- case NETFS_INVALID_WRITE:
- break;
default:
BUG();
}
@@ -536,15 +515,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
}
trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
-
- clear_and_wake_up_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
-
- /* If we are at the head of the queue, wake up the collector,
- * transferring a ref to it if we were the ones to do so.
- */
- if (list_is_first(&subreq->rreq_link, &stream->subrequests))
- netfs_wake_write_collector(wreq, was_async);
-
- netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+ netfs_subreq_clear_in_progress(subreq);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
}
EXPORT_SYMBOL(netfs_write_subrequest_terminated);
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 77279fc5b5a7..50bee2c4130d 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -134,7 +134,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
return wreq;
nomem:
wreq->error = -ENOMEM;
- netfs_put_request(wreq, false, netfs_rreq_trace_put_failed);
+ netfs_put_request(wreq, netfs_rreq_trace_put_failed);
return ERR_PTR(-ENOMEM);
}
@@ -233,7 +233,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream,
_enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
- return netfs_write_subrequest_terminated(subreq, subreq->error, false);
+ return netfs_write_subrequest_terminated(subreq, subreq->error);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
stream->issue_write(subreq);
@@ -542,7 +542,7 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq)
}
if (needs_poke)
- netfs_wake_write_collector(wreq, false);
+ netfs_wake_collector(wreq);
}
/*
@@ -576,6 +576,7 @@ int netfs_writepages(struct address_space *mapping,
goto couldnt_start;
}
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
trace_netfs_write(wreq, netfs_write_trace_writeback);
netfs_stat(&netfs_n_wh_writepages);
@@ -599,8 +600,9 @@ int netfs_writepages(struct address_space *mapping,
netfs_end_issue_write(wreq);
mutex_unlock(&ictx->wb_lock);
+ netfs_wake_collector(wreq);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
_leave(" = %d", error);
return error;
@@ -673,11 +675,11 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
/*
* End a write operation used when writing through the pagecache.
*/
-int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
- struct folio *writethrough_cache)
+ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
+ struct folio *writethrough_cache)
{
struct netfs_inode *ictx = netfs_inode(wreq->inode);
- int ret;
+ ssize_t ret;
_enter("R=%x", wreq->debug_id);
@@ -688,13 +690,11 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
mutex_unlock(&ictx->wb_lock);
- if (wreq->iocb) {
+ if (wreq->iocb)
ret = -EIOCBQUEUED;
- } else {
- wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
- ret = wreq->error;
- }
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ else
+ ret = netfs_wait_for_write(wreq);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
return ret;
}
@@ -722,10 +722,8 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
start += part;
len -= part;
rolling_buffer_advance(&wreq->buffer, part);
- if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
- trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
- wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags));
- }
+ if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags))
+ netfs_wait_for_paused_write(wreq);
if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
break;
}
@@ -885,7 +883,8 @@ int netfs_writeback_single(struct address_space *mapping,
goto couldnt_start;
}
- trace_netfs_write(wreq, netfs_write_trace_writeback);
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
+ trace_netfs_write(wreq, netfs_write_trace_writeback_single);
netfs_stat(&netfs_n_wh_writepages);
if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
@@ -914,8 +913,9 @@ stop:
set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
mutex_unlock(&ictx->wb_lock);
+ netfs_wake_collector(wreq);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
_leave(" = %d", ret);
return ret;
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index 545d33079a77..9d1d8a8bab72 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -39,9 +39,10 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
break;
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
- struct iov_iter source = subreq->io_iter;
+ struct iov_iter source;
- iov_iter_revert(&source, subreq->len - source.count);
+ netfs_reset_iter(subreq);
+ source = subreq->io_iter;
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_write(stream, subreq, &source);
}
@@ -131,7 +132,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
}
@@ -199,7 +200,6 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
*/
void netfs_retry_writes(struct netfs_io_request *wreq)
{
- struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream;
int s;
@@ -208,16 +208,13 @@ void netfs_retry_writes(struct netfs_io_request *wreq)
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
+ set_bit(NETFS_RREQ_RETRYING, &wreq->flags);
for (s = 0; s < NR_IO_STREAMS; s++) {
stream = &wreq->io_streams[s];
- if (!stream->active)
- continue;
-
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
- }
+ if (stream->active)
+ netfs_wait_for_in_progress_stream(wreq, stream);
}
+ clear_bit(NETFS_RREQ_RETRYING, &wreq->flags);
// TODO: Enc: Fetch changed partial pages
// TODO: Enc: Reencrypt content if needed.
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 6d63b958c4bb..cf35ad3f818a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -180,7 +180,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_proto = cl_init->proto;
clp->cl_nconnect = cl_init->nconnect;
clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
- clp->cl_net = get_net(cl_init->net);
+ clp->cl_net = get_net_track(cl_init->net, &clp->cl_ns_tracker, GFP_KERNEL);
#if IS_ENABLED(CONFIG_NFS_LOCALIO)
seqlock_init(&clp->cl_boot_lock);
@@ -250,7 +250,7 @@ void nfs_free_client(struct nfs_client *clp)
if (!IS_ERR(clp->cl_rpcclient))
rpc_shutdown_client(clp->cl_rpcclient);
- put_net(clp->cl_net);
+ put_net_track(clp->cl_net, &clp->cl_ns_tracker);
put_nfs_version(clp->cl_nfs_mod);
kfree(clp->cl_hostname);
kfree(clp->cl_acceptor);
@@ -439,7 +439,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
spin_unlock(&nn->nfs_client_lock);
new = rpc_ops->init_client(new, cl_init);
if (!IS_ERR(new))
- nfs_local_probe(new);
+ nfs_local_probe_async(new);
return new;
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 8bdbc4dca89c..10ef46e29b25 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -1021,13 +1021,6 @@ out:
nfs_inode_find_state_and_recover(inode, stateid);
}
-void nfs_remove_bad_delegation(struct inode *inode,
- const nfs4_stateid *stateid)
-{
- nfs_revoke_delegation(inode, stateid);
-}
-EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
-
void nfs_delegation_mark_returned(struct inode *inode,
const nfs4_stateid *stateid)
{
@@ -1070,6 +1063,24 @@ out_rcu_unlock:
}
/**
+ * nfs_remove_bad_delegation - handle delegations that are unusable
+ * @inode: inode to process
+ * @stateid: the delegation's stateid
+ *
+ * If the server ACK-ed our FREE_STATEID then clean
+ * up the delegation, else mark and keep the revoked state.
+ */
+void nfs_remove_bad_delegation(struct inode *inode,
+ const nfs4_stateid *stateid)
+{
+ if (stateid && stateid->type == NFS4_FREED_STATEID_TYPE)
+ nfs_delegation_mark_returned(inode, stateid);
+ else
+ nfs_revoke_delegation(inode, stateid);
+}
+EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation);
+
+/**
* nfs_expire_unused_delegation_types
* @clp: client to process
* @flags: delegation types to expire
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index e6909cafab68..df4807460596 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1129,6 +1129,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
break;
case -NFS4ERR_DELAY:
+ nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
+ fallthrough;
case -NFS4ERR_GRACE:
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
break;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 4a304cf17c4b..656d5c50bbce 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -400,7 +400,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
* keep ds_clp even if DS is local, so that if local IO cannot
* proceed somehow, we can fall back to NFS whenever we want.
*/
- nfs_local_probe(ds->ds_clp);
+ nfs_local_probe_async(ds->ds_clp);
max_payload =
nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
NULL);
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index e278a1ad1ca3..8b0785178731 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -367,6 +367,7 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
sreq = netfs->sreq;
if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
+ sreq->rreq->origin != NETFS_UNBUFFERED_READ &&
sreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 119e447758b9..8ab7868807a7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -557,6 +557,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
set_nlink(inode, fattr->nlink);
else if (fattr_supported & NFS_ATTR_FATTR_NLINK)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK);
+ else
+ set_nlink(inode, 1);
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
else if (fattr_supported & NFS_ATTR_FATTR_OWNER)
@@ -633,6 +635,34 @@ nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr)
}
}
+static void nfs_set_timestamps_to_ts(struct inode *inode, struct iattr *attr)
+{
+ unsigned int cache_flags = 0;
+
+ if (attr->ia_valid & ATTR_MTIME_SET) {
+ struct timespec64 ctime = inode_get_ctime(inode);
+ struct timespec64 mtime = inode_get_mtime(inode);
+ struct timespec64 now;
+ int updated = 0;
+
+ now = inode_set_ctime_current(inode);
+ if (!timespec64_equal(&now, &ctime))
+ updated |= S_CTIME;
+
+ inode_set_mtime_to_ts(inode, attr->ia_mtime);
+ if (!timespec64_equal(&now, &mtime))
+ updated |= S_MTIME;
+
+ inode_maybe_inc_iversion(inode, updated);
+ cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ }
+ if (attr->ia_valid & ATTR_ATIME_SET) {
+ inode_set_atime_to_ts(inode, attr->ia_atime);
+ cache_flags |= NFS_INO_INVALID_ATIME;
+ }
+ NFS_I(inode)->cache_validity &= ~cache_flags;
+}
+
static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid)
{
enum file_time_flags time_flags = 0;
@@ -701,14 +731,27 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) {
spin_lock(&inode->i_lock);
- nfs_update_timestamps(inode, attr->ia_valid);
+ if (attr->ia_valid & ATTR_MTIME_SET) {
+ nfs_set_timestamps_to_ts(inode, attr);
+ attr->ia_valid &= ~(ATTR_MTIME|ATTR_MTIME_SET|
+ ATTR_ATIME|ATTR_ATIME_SET);
+ } else {
+ nfs_update_timestamps(inode, attr->ia_valid);
+ attr->ia_valid &= ~(ATTR_MTIME|ATTR_ATIME);
+ }
spin_unlock(&inode->i_lock);
- attr->ia_valid &= ~(ATTR_MTIME | ATTR_ATIME);
} else if (nfs_have_delegated_atime(inode) &&
attr->ia_valid & ATTR_ATIME &&
!(attr->ia_valid & ATTR_MTIME)) {
- nfs_update_delegated_atime(inode);
- attr->ia_valid &= ~ATTR_ATIME;
+ if (attr->ia_valid & ATTR_ATIME_SET) {
+ spin_lock(&inode->i_lock);
+ nfs_set_timestamps_to_ts(inode, attr);
+ spin_unlock(&inode->i_lock);
+ attr->ia_valid &= ~(ATTR_ATIME|ATTR_ATIME_SET);
+ } else {
+ nfs_update_delegated_atime(inode);
+ attr->ia_valid &= ~ATTR_ATIME;
+ }
}
/* Optimization: if the end result is no change, don't RPC */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 6655e5f32ec6..69c2c10ee658 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -455,7 +455,6 @@ extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
#if IS_ENABLED(CONFIG_NFS_LOCALIO)
/* localio.c */
-extern void nfs_local_probe(struct nfs_client *);
extern void nfs_local_probe_async(struct nfs_client *);
extern void nfs_local_probe_async_work(struct work_struct *);
extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *,
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 4ec952f9f47d..510d0a16cfe9 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -171,7 +171,7 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp)
* - called after alloc_client and init_client (so cl_rpcclient exists)
* - this function is idempotent, it can be called for old or new clients
*/
-void nfs_local_probe(struct nfs_client *clp)
+static void nfs_local_probe(struct nfs_client *clp)
{
/* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
if (!localio_enabled ||
@@ -191,14 +191,16 @@ void nfs_local_probe(struct nfs_client *clp)
nfs_localio_enable_client(clp);
nfs_uuid_end(&clp->cl_uuid);
}
-EXPORT_SYMBOL_GPL(nfs_local_probe);
void nfs_local_probe_async_work(struct work_struct *work)
{
struct nfs_client *clp =
container_of(work, struct nfs_client, cl_local_probe_work);
+ if (!refcount_inc_not_zero(&clp->cl_count))
+ return;
nfs_local_probe(clp);
+ nfs_put_client(clp);
}
void nfs_local_probe_async(struct nfs_client *clp)
@@ -207,14 +209,16 @@ void nfs_local_probe_async(struct nfs_client *clp)
}
EXPORT_SYMBOL_GPL(nfs_local_probe_async);
-static inline struct nfsd_file *nfs_local_file_get(struct nfsd_file *nf)
+static inline void nfs_local_file_put(struct nfsd_file *localio)
{
- return nfs_to->nfsd_file_get(nf);
-}
+ /* nfs_to_nfsd_file_put_local() expects an __rcu pointer
+ * but we have a __kernel pointer. It is always safe
+ * to cast a __kernel pointer to an __rcu pointer
+ * because the cast only weakens what is known about the pointer.
+ */
+ struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio;
-static inline void nfs_local_file_put(struct nfsd_file *nf)
-{
- nfs_to->nfsd_file_put(nf);
+ nfs_to_nfsd_file_put_local(&nf);
}
/*
@@ -226,12 +230,13 @@ static inline void nfs_local_file_put(struct nfsd_file *nf)
static struct nfsd_file *
__nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ struct nfsd_file __rcu **pnf,
const fmode_t mode)
{
struct nfsd_file *localio;
localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
- cred, fh, nfl, mode);
+ cred, fh, nfl, pnf, mode);
if (IS_ERR(localio)) {
int status = PTR_ERR(localio);
trace_nfs_local_open_fh(fh, mode, status);
@@ -258,7 +263,7 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
struct nfs_fh *fh, struct nfs_file_localio *nfl,
const fmode_t mode)
{
- struct nfsd_file *nf, *new, __rcu **pnf;
+ struct nfsd_file *nf, __rcu **pnf;
if (!nfs_server_is_local(clp))
return NULL;
@@ -270,29 +275,9 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
else
pnf = &nfl->ro_file;
- new = NULL;
- rcu_read_lock();
- nf = rcu_dereference(*pnf);
- if (!nf) {
- rcu_read_unlock();
- new = __nfs_local_open_fh(clp, cred, fh, nfl, mode);
- if (IS_ERR(new))
- return NULL;
- rcu_read_lock();
- /* try to swap in the pointer */
- spin_lock(&clp->cl_uuid.lock);
- nf = rcu_dereference_protected(*pnf, 1);
- if (!nf) {
- nf = new;
- new = NULL;
- rcu_assign_pointer(*pnf, nf);
- }
- spin_unlock(&clp->cl_uuid.lock);
- }
- nf = nfs_local_file_get(nf);
- rcu_read_unlock();
- if (new)
- nfs_to_nfsd_file_put_local(new);
+ nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode);
+ if (IS_ERR(nf))
+ return NULL;
return nf;
}
EXPORT_SYMBOL_GPL(nfs_local_open_fh);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 973aed9cc5fe..7f1ec9c67ff2 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -195,7 +195,6 @@ struct vfsmount *nfs_d_automount(struct path *path)
if (IS_ERR(mnt))
goto out_fc;
- mntget(mnt); /* prevent immediate expiration */
if (timeout <= 0)
goto out_fc;
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 0282d93c8bcc..aafd15a4afce 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -21,6 +21,7 @@ int nfs42_proc_allocate(struct file *, loff_t, loff_t);
ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t,
struct nl4_server *, nfs4_stateid *, bool);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
+int nfs42_proc_zero_range(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
struct nfs42_layoutstat_data *);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 5cf52ece96ac..01c01f45358b 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -146,7 +146,8 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == -EOPNOTSUPP)
- NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
+ NFS_SERVER(inode)->caps &= ~(NFS_CAP_ALLOCATE |
+ NFS_CAP_ZERO_RANGE);
inode_unlock(inode);
return err;
@@ -169,7 +170,31 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
if (err == 0)
truncate_pagecache_range(inode, offset, (offset + len) -1);
if (err == -EOPNOTSUPP)
- NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
+ NFS_SERVER(inode)->caps &= ~(NFS_CAP_DEALLOCATE |
+ NFS_CAP_ZERO_RANGE);
+
+ inode_unlock(inode);
+ return err;
+}
+
+int nfs42_proc_zero_range(struct file *filep, loff_t offset, loff_t len)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ZERO_RANGE],
+ };
+ struct inode *inode = file_inode(filep);
+ int err;
+
+ if (!nfs_server_capable(inode, NFS_CAP_ZERO_RANGE))
+ return -EOPNOTSUPP;
+
+ inode_lock(inode);
+
+ err = nfs42_proc_fallocate(&msg, filep, offset, len);
+ if (err == 0)
+ truncate_pagecache_range(inode, offset, (offset + len) -1);
+ if (err == -EOPNOTSUPP)
+ NFS_SERVER(inode)->caps &= ~NFS_CAP_ZERO_RANGE;
inode_unlock(inode);
return err;
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index b1b663468249..4cc915d5741d 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -174,6 +174,18 @@
decode_putfh_maxsz + \
decode_deallocate_maxsz + \
decode_getattr_maxsz)
+#define NFS4_enc_zero_range_sz (compound_encode_hdr_maxsz + \
+ encode_sequence_maxsz + \
+ encode_putfh_maxsz + \
+ encode_deallocate_maxsz + \
+ encode_allocate_maxsz + \
+ encode_getattr_maxsz)
+#define NFS4_dec_zero_range_sz (compound_decode_hdr_maxsz + \
+ decode_sequence_maxsz + \
+ decode_putfh_maxsz + \
+ decode_deallocate_maxsz + \
+ decode_allocate_maxsz + \
+ decode_getattr_maxsz)
#define NFS4_enc_read_plus_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -649,6 +661,27 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
}
/*
+ * Encode ZERO_RANGE request
+ */
+static void nfs4_xdr_enc_zero_range(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const void *data)
+{
+ const struct nfs42_falloc_args *args = data;
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->falloc_fh, &hdr);
+ encode_deallocate(xdr, args, &hdr);
+ encode_allocate(xdr, args, &hdr);
+ encode_getfattr(xdr, args->falloc_bitmask, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* Encode READ_PLUS request
*/
static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req,
@@ -1511,6 +1544,37 @@ out:
}
/*
+ * Decode ZERO_RANGE request
+ */
+static int nfs4_xdr_dec_zero_range(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfs42_falloc_res *res = data;
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_deallocate(xdr, res);
+ if (status)
+ goto out;
+ status = decode_allocate(xdr, res);
+ if (status)
+ goto out;
+ decode_getfattr(xdr, res->falloc_fattr, res->falloc_server);
+out:
+ return status;
+}
+
+/*
* Decode READ_PLUS request
*/
static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7d383d29a995..d3ca91f60fc1 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -67,8 +67,7 @@ struct nfs4_minor_version_ops {
void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
int (*test_and_free_expired)(struct nfs_server *,
- const nfs4_stateid *,
- const struct cred *);
+ nfs4_stateid *, const struct cred *);
struct nfs_seqid *
(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
void (*session_trunk)(struct rpc_clnt *clnt,
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 1cd9652f3c28..5e9d66f3466c 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -225,8 +225,14 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
- if ((mode != 0) && (mode != (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)))
+ switch (mode) {
+ case 0:
+ case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
+ case FALLOC_FL_ZERO_RANGE:
+ break;
+ default:
return -EOPNOTSUPP;
+ }
ret = inode_newsize_ok(inode, offset + len);
if (ret < 0)
@@ -234,6 +240,8 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
if (mode & FALLOC_FL_PUNCH_HOLE)
return nfs42_proc_deallocate(filep, offset, len);
+ else if (mode & FALLOC_FL_ZERO_RANGE)
+ return nfs42_proc_zero_range(filep, offset ,len);
return nfs42_proc_allocate(filep, offset, len);
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b1d2122bd5a7..341740fa293d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -105,7 +105,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
bool is_privileged);
static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *);
-static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
+static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *,
const struct cred *, bool);
#endif
@@ -325,14 +325,14 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
if (nfs_have_delegated_mtime(inode)) {
if (!(cache_validity & NFS_INO_INVALID_ATIME))
- dst[1] &= ~FATTR4_WORD1_TIME_ACCESS;
+ dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET);
if (!(cache_validity & NFS_INO_INVALID_MTIME))
- dst[1] &= ~FATTR4_WORD1_TIME_MODIFY;
+ dst[1] &= ~(FATTR4_WORD1_TIME_MODIFY|FATTR4_WORD1_TIME_MODIFY_SET);
if (!(cache_validity & NFS_INO_INVALID_CTIME))
- dst[1] &= ~FATTR4_WORD1_TIME_METADATA;
+ dst[1] &= ~(FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY_SET);
} else if (nfs_have_delegated_atime(inode)) {
if (!(cache_validity & NFS_INO_INVALID_ATIME))
- dst[1] &= ~FATTR4_WORD1_TIME_ACCESS;
+ dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET);
}
}
@@ -2903,16 +2903,14 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
}
static int nfs40_test_and_free_expired_stateid(struct nfs_server *server,
- const nfs4_stateid *stateid,
- const struct cred *cred)
+ nfs4_stateid *stateid, const struct cred *cred)
{
return -NFS4ERR_BAD_STATEID;
}
#if defined(CONFIG_NFS_V4_1)
static int nfs41_test_and_free_expired_stateid(struct nfs_server *server,
- const nfs4_stateid *stateid,
- const struct cred *cred)
+ nfs4_stateid *stateid, const struct cred *cred)
{
int status;
@@ -2921,6 +2919,7 @@ static int nfs41_test_and_free_expired_stateid(struct nfs_server *server,
break;
case NFS4_INVALID_STATEID_TYPE:
case NFS4_SPECIAL_STATEID_TYPE:
+ case NFS4_FREED_STATEID_TYPE:
return -NFS4ERR_BAD_STATEID;
case NFS4_REVOKED_STATEID_TYPE:
goto out_free;
@@ -3976,8 +3975,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
FATTR4_WORD0_CASE_INSENSITIVE |
FATTR4_WORD0_CASE_PRESERVING;
if (minorversion)
- bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT |
- FATTR4_WORD2_OPEN_ARGUMENTS;
+ bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT;
+ if (minorversion > 1)
+ bitmask[2] |= FATTR4_WORD2_OPEN_ARGUMENTS;
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
if (status == 0) {
@@ -5164,13 +5164,15 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
}
static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry,
- struct nfs4_createdata *data)
+ struct nfs4_createdata *data, int *statusp)
{
- int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
+ struct dentry *ret;
+
+ *statusp = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
&data->arg.seq_args, &data->res.seq_res, 1);
- if (status)
- return ERR_PTR(status);
+ if (*statusp)
+ return NULL;
spin_lock(&dir->i_lock);
/* Creating a directory bumps nlink in the parent */
@@ -5179,7 +5181,11 @@ static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry,
data->res.fattr->time_start,
NFS_INO_INVALID_DATA);
spin_unlock(&dir->i_lock);
- return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr);
+ ret = nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr);
+ if (!IS_ERR(ret))
+ return ret;
+ *statusp = PTR_ERR(ret);
+ return NULL;
}
static void nfs4_free_createdata(struct nfs4_createdata *data)
@@ -5240,17 +5246,18 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
static struct dentry *_nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
struct iattr *sattr,
- struct nfs4_label *label)
+ struct nfs4_label *label, int *statusp)
{
struct nfs4_createdata *data;
- struct dentry *ret = ERR_PTR(-ENOMEM);
+ struct dentry *ret = NULL;
+ *statusp = -ENOMEM;
data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR);
if (data == NULL)
goto out;
data->arg.label = label;
- ret = nfs4_do_mkdir(dir, dentry, data);
+ ret = nfs4_do_mkdir(dir, dentry, data, statusp);
nfs4_free_createdata(data);
out:
@@ -5273,11 +5280,12 @@ static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
sattr->ia_mode &= ~current_umask();
do {
- alias = _nfs4_proc_mkdir(dir, dentry, sattr, label);
- err = PTR_ERR_OR_ZERO(alias);
+ alias = _nfs4_proc_mkdir(dir, dentry, sattr, label, &err);
trace_nfs4_mkdir(dir, &dentry->d_name, err);
- err = nfs4_handle_exception(NFS_SERVER(dir), err,
- &exception);
+ if (err)
+ alias = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
+ err,
+ &exception));
} while (exception.retry);
nfs4_label_release_security(label);
@@ -6211,6 +6219,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen,
struct nfs_server *server = NFS_SERVER(inode);
int ret;
+ if (unlikely(NFS_FH(inode)->size == 0))
+ return -ENODATA;
if (!nfs4_server_supports_acls(server, type))
return -EOPNOTSUPP;
ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
@@ -6285,6 +6295,9 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf,
{
struct nfs4_exception exception = { };
int err;
+
+ if (unlikely(NFS_FH(inode)->size == 0))
+ return -ENODATA;
do {
err = __nfs4_proc_set_acl(inode, buf, buflen, type);
trace_nfs4_set_acl(inode, err);
@@ -10611,7 +10624,7 @@ static const struct rpc_call_ops nfs41_free_stateid_ops = {
* Note: this function is always asynchronous.
*/
static int nfs41_free_stateid(struct nfs_server *server,
- const nfs4_stateid *stateid,
+ nfs4_stateid *stateid,
const struct cred *cred,
bool privileged)
{
@@ -10651,6 +10664,7 @@ static int nfs41_free_stateid(struct nfs_server *server,
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
+ stateid->type = NFS4_FREED_STATEID_TYPE;
return 0;
}
@@ -10817,6 +10831,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_OFFLOAD_CANCEL
| NFS_CAP_COPY_NOTIFY
| NFS_CAP_DEALLOCATE
+ | NFS_CAP_ZERO_RANGE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS
| NFS_CAP_CLONE
@@ -10852,7 +10867,7 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
{
- ssize_t error, error2, error3;
+ ssize_t error, error2, error3, error4;
size_t left = size;
error = generic_listxattr(dentry, list, left);
@@ -10875,8 +10890,16 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left);
if (error3 < 0)
return error3;
+ if (list) {
+ list += error3;
+ left -= error3;
+ }
+
+ error4 = security_inode_listsecurity(d_inode(dentry), list, left);
+ if (error4 < 0)
+ return error4;
- error += error2 + error3;
+ error += error2 + error3 + error4;
if (size && error > size)
return -ERANGE;
return error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 55bef5fbfa47..318afde38057 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7711,6 +7711,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
PROC42(LISTXATTRS, enc_listxattrs, dec_listxattrs),
PROC42(REMOVEXATTR, enc_removexattr, dec_removexattr),
PROC42(READ_PLUS, enc_read_plus, dec_read_plus),
+ PROC42(ZERO_RANGE, enc_zero_range, dec_zero_range),
};
static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 91ef486f40b9..b4ccdf78d4dd 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -830,10 +830,16 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
.servername = clp->cl_hostname,
.connect_timeout = connect_timeout,
.reconnect_timeout = connect_timeout,
+ .xprtsec = clp->cl_xprtsec,
};
- if (da->da_transport != clp->cl_proto)
+ if (da->da_transport != clp->cl_proto &&
+ clp->cl_proto != XPRT_TRANSPORT_TCP_TLS)
continue;
+ if (da->da_transport == XPRT_TRANSPORT_TCP &&
+ mds_srv->nfs_client->cl_proto == XPRT_TRANSPORT_TCP_TLS)
+ xprt_args.ident = XPRT_TRANSPORT_TCP_TLS;
+
if (da->da_addr.ss_family != clp->cl_addr.ss_family)
continue;
/* Add this address as an alias */
@@ -841,6 +847,9 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
rpc_clnt_test_and_add_xprt, NULL);
continue;
}
+ if (da->da_transport == XPRT_TRANSPORT_TCP &&
+ mds_srv->nfs_client->cl_proto == XPRT_TRANSPORT_TCP_TLS)
+ da->da_transport = XPRT_TRANSPORT_TCP_TLS;
clp = get_v3_ds_connect(mds_srv,
&da->da_addr,
da->da_addrlen, da->da_transport,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 81bd1b9aba17..3c1fa320b3f1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -56,7 +56,8 @@ static int nfs_return_empty_folio(struct folio *folio)
{
folio_zero_segment(folio, 0, folio_size(folio));
folio_mark_uptodate(folio);
- folio_unlock(folio);
+ if (nfs_netfs_folio_unlock(folio))
+ folio_unlock(folio);
return 0;
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9eea9e62afc9..91b5503b6f74 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1052,6 +1052,16 @@ int nfs_reconfigure(struct fs_context *fc)
sync_filesystem(sb);
/*
+ * The SB_RDONLY flag has been removed from the superblock during
+ * mounts to prevent interference between different filesystems.
+ * Similarly, it is also necessary to ignore the SB_RDONLY flag
+ * during reconfiguration; otherwise, it may also result in the
+ * creation of redundant superblocks when mounting a directory with
+ * different rw and ro flags multiple times.
+ */
+ fc->sb_flags_mask &= ~SB_RDONLY;
+
+ /*
* Userspace mount programs that send binary options generally send
* them populated with default values. We have no way to know which
* ones were explicitly specified. Fall back to legacy behavior and
@@ -1308,8 +1318,17 @@ int nfs_get_tree_common(struct fs_context *fc)
if (IS_ERR(server))
return PTR_ERR(server);
+ /*
+ * When NFS_MOUNT_UNSHARED is not set, NFS forces the sharing of a
+ * superblock among each filesystem that mounts sub-directories
+ * belonging to a single exported root path.
+ * To prevent interference between different filesystems, the
+ * SB_RDONLY flag should be removed from the superblock.
+ */
if (server->flags & NFS_MOUNT_UNSHARED)
compare_super = NULL;
+ else
+ fc->sb_flags &= ~SB_RDONLY;
/* -o noac implies -o sync */
if (server->flags & NFS_MOUNT_NOAC)
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 37cb2b776435..545148d42dcc 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -387,6 +387,33 @@ static inline void nfs_sysfs_add_nfsv41_server(struct nfs_server *server)
}
#endif /* CONFIG_NFS_V4_1 */
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+
+static ssize_t
+localio_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct nfs_server *server = container_of(kobj, struct nfs_server, kobj);
+ bool localio = nfs_server_is_local(server->nfs_client);
+ return sysfs_emit(buf, "%d\n", localio);
+}
+
+static struct kobj_attribute nfs_sysfs_attr_localio = __ATTR_RO(localio);
+
+static void nfs_sysfs_add_nfs_localio_server(struct nfs_server *server)
+{
+ int ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_localio.attr,
+ nfs_netns_server_namespace(&server->kobj));
+ if (ret < 0)
+ pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n",
+ server->s_sysfs_id, ret);
+}
+#else
+static inline void nfs_sysfs_add_nfs_localio_server(struct nfs_server *server)
+{
+}
+#endif /* IS_ENABLED(CONFIG_NFS_LOCALIO) */
+
void nfs_sysfs_add_server(struct nfs_server *server)
{
int ret;
@@ -405,6 +432,7 @@ void nfs_sysfs_add_server(struct nfs_server *server)
server->s_sysfs_id, ret);
nfs_sysfs_add_nfsv41_server(server);
+ nfs_sysfs_add_nfs_localio_server(server);
}
EXPORT_SYMBOL_GPL(nfs_sysfs_add_server);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 23df8b214474..374fc6b34c79 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -632,19 +632,19 @@ static void nfs_write_error(struct nfs_page *req, int error)
* Find an associated nfs write request, and prepare to flush it out
* May return an error if the user signalled nfs_wait_on_request().
*/
-static int nfs_page_async_flush(struct folio *folio,
- struct writeback_control *wbc,
- struct nfs_pageio_descriptor *pgio)
+static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc,
+ struct nfs_pageio_descriptor *pgio)
{
struct nfs_page *req;
- int ret = 0;
+ int ret;
+
+ nfs_pageio_cond_complete(pgio, folio->index);
req = nfs_lock_and_join_requests(folio);
if (!req)
- goto out;
- ret = PTR_ERR(req);
+ return 0;
if (IS_ERR(req))
- goto out;
+ return PTR_ERR(req);
nfs_folio_set_writeback(folio);
WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
@@ -654,7 +654,6 @@ static int nfs_page_async_flush(struct folio *folio,
if (nfs_error_is_fatal_on_server(ret))
goto out_launder;
- ret = 0;
if (!nfs_pageio_add_request(pgio, req)) {
ret = pgio->pg_error;
/*
@@ -662,28 +661,20 @@ static int nfs_page_async_flush(struct folio *folio,
*/
if (nfs_error_is_fatal_on_server(ret))
goto out_launder;
- if (wbc->sync_mode == WB_SYNC_NONE)
- ret = AOP_WRITEPAGE_ACTIVATE;
folio_redirty_for_writepage(wbc, folio);
nfs_redirty_request(req);
pgio->pg_error = 0;
- } else
- nfs_add_stats(folio->mapping->host,
- NFSIOS_WRITEPAGES, 1);
-out:
- return ret;
+ return ret;
+ }
+
+ nfs_add_stats(folio->mapping->host, NFSIOS_WRITEPAGES, 1);
+ return 0;
+
out_launder:
nfs_write_error(req, ret);
return 0;
}
-static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc,
- struct nfs_pageio_descriptor *pgio)
-{
- nfs_pageio_cond_complete(pgio, folio->index);
- return nfs_page_async_flush(folio, wbc, pgio);
-}
-
/*
* Write an mmapped page to the server.
*/
@@ -703,17 +694,6 @@ static int nfs_writepage_locked(struct folio *folio,
return err;
}
-static int nfs_writepages_callback(struct folio *folio,
- struct writeback_control *wbc, void *data)
-{
- int ret;
-
- ret = nfs_do_writepage(folio, wbc, data);
- if (ret != AOP_WRITEPAGE_ACTIVATE)
- folio_unlock(folio);
- return ret;
-}
-
static void nfs_io_completion_commit(void *inode)
{
nfs_commit_inode(inode, 0);
@@ -749,11 +729,15 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
}
do {
+ struct folio *folio = NULL;
+
nfs_pageio_init_write(&pgio, inode, priority, false,
&nfs_async_write_completion_ops);
pgio.pg_io_completion = ioc;
- err = write_cache_pages(mapping, wbc, nfs_writepages_callback,
- &pgio);
+ while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
+ err = nfs_do_writepage(folio, wbc, &pgio);
+ folio_unlock(folio);
+ }
pgio.pg_error = 0;
nfs_pageio_complete(&pgio);
if (err == -EAGAIN && mntflags & NFS_MOUNT_SOFTERR)
diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
index 6a0bdea6d644..05c7c16e37ab 100644
--- a/fs/nfs_common/nfslocalio.c
+++ b/fs/nfs_common/nfslocalio.c
@@ -151,8 +151,7 @@ EXPORT_SYMBOL_GPL(nfs_localio_enable_client);
*/
static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid)
{
- LIST_HEAD(local_files);
- struct nfs_file_localio *nfl, *tmp;
+ struct nfs_file_localio *nfl;
spin_lock(&nfs_uuid->lock);
if (unlikely(!rcu_access_pointer(nfs_uuid->net))) {
@@ -166,17 +165,42 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid)
nfs_uuid->dom = NULL;
}
- list_splice_init(&nfs_uuid->files, &local_files);
- spin_unlock(&nfs_uuid->lock);
-
/* Walk list of files and ensure their last references dropped */
- list_for_each_entry_safe(nfl, tmp, &local_files, list) {
- nfs_close_local_fh(nfl);
+
+ while ((nfl = list_first_entry_or_null(&nfs_uuid->files,
+ struct nfs_file_localio,
+ list)) != NULL) {
+ /* If nfs_uuid is already NULL, nfs_close_local_fh is
+ * closing and we must wait, else we unlink and close.
+ */
+ if (rcu_access_pointer(nfl->nfs_uuid) == NULL) {
+ /* nfs_close_local_fh() is doing the
+ * close and we must wait. until it unlinks
+ */
+ wait_var_event_spinlock(nfl,
+ list_first_entry_or_null(
+ &nfs_uuid->files,
+ struct nfs_file_localio,
+ list) != nfl,
+ &nfs_uuid->lock);
+ continue;
+ }
+
+ /* Remove nfl from nfs_uuid->files list */
+ list_del_init(&nfl->list);
+ spin_unlock(&nfs_uuid->lock);
+
+ nfs_to_nfsd_file_put_local(&nfl->ro_file);
+ nfs_to_nfsd_file_put_local(&nfl->rw_file);
cond_resched();
- }
- spin_lock(&nfs_uuid->lock);
- BUG_ON(!list_empty(&nfs_uuid->files));
+ spin_lock(&nfs_uuid->lock);
+ /* Now we can allow racing nfs_close_local_fh() to
+ * skip the locking.
+ */
+ RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
+ wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock);
+ }
/* Remove client from nn->local_clients */
if (nfs_uuid->list_lock) {
@@ -237,6 +261,7 @@ static void nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl
struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
struct rpc_clnt *rpc_clnt, const struct cred *cred,
const struct nfs_fh *nfs_fh, struct nfs_file_localio *nfl,
+ struct nfsd_file __rcu **pnf,
const fmode_t fmode)
{
struct net *net;
@@ -261,10 +286,9 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
rcu_read_unlock();
/* We have an implied reference to net thanks to nfsd_net_try_get */
localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt,
- cred, nfs_fh, fmode);
- if (IS_ERR(localio))
- nfs_to_nfsd_net_put(net);
- else
+ cred, nfs_fh, pnf, fmode);
+ nfs_to_nfsd_net_put(net);
+ if (!IS_ERR(localio))
nfs_uuid_add_file(uuid, nfl);
return localio;
@@ -273,8 +297,6 @@ EXPORT_SYMBOL_GPL(nfs_open_local_fh);
void nfs_close_local_fh(struct nfs_file_localio *nfl)
{
- struct nfsd_file *ro_nf = NULL;
- struct nfsd_file *rw_nf = NULL;
nfs_uuid_t *nfs_uuid;
rcu_read_lock();
@@ -285,28 +307,39 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl)
return;
}
- ro_nf = rcu_access_pointer(nfl->ro_file);
- rw_nf = rcu_access_pointer(nfl->rw_file);
- if (ro_nf || rw_nf) {
- spin_lock(&nfs_uuid->lock);
- if (ro_nf)
- ro_nf = rcu_dereference_protected(xchg(&nfl->ro_file, NULL), 1);
- if (rw_nf)
- rw_nf = rcu_dereference_protected(xchg(&nfl->rw_file, NULL), 1);
-
- /* Remove nfl from nfs_uuid->files list */
- RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
- list_del_init(&nfl->list);
+ spin_lock(&nfs_uuid->lock);
+ if (!rcu_access_pointer(nfl->nfs_uuid)) {
+ /* nfs_uuid_put has finished here */
spin_unlock(&nfs_uuid->lock);
rcu_read_unlock();
-
- if (ro_nf)
- nfs_to_nfsd_file_put_local(ro_nf);
- if (rw_nf)
- nfs_to_nfsd_file_put_local(rw_nf);
return;
}
+ if (list_empty(&nfs_uuid->files)) {
+ /* nfs_uuid_put() has started closing files, wait for it
+ * to finished
+ */
+ spin_unlock(&nfs_uuid->lock);
+ rcu_read_unlock();
+ wait_var_event(&nfl->nfs_uuid,
+ rcu_access_pointer(nfl->nfs_uuid) == NULL);
+ return;
+ }
+ /* tell nfs_uuid_put() to wait for us */
+ RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
+ spin_unlock(&nfs_uuid->lock);
rcu_read_unlock();
+
+ nfs_to_nfsd_file_put_local(&nfl->ro_file);
+ nfs_to_nfsd_file_put_local(&nfl->rw_file);
+
+ /* Remove nfl from nfs_uuid->files list and signal nfs_uuid_put()
+ * that we are done. The moment we drop the spinlock the
+ * nfs_uuid could be freed.
+ */
+ spin_lock(&nfs_uuid->lock);
+ list_del_init(&nfl->list);
+ wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock);
+ spin_unlock(&nfs_uuid->lock);
}
EXPORT_SYMBOL_GPL(nfs_close_local_fh);
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index ab85e6a2454f..e108b6c705b4 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -378,15 +378,41 @@ nfsd_file_put(struct nfsd_file *nf)
* the reference of the nfsd_file.
*/
struct net *
-nfsd_file_put_local(struct nfsd_file *nf)
+nfsd_file_put_local(struct nfsd_file __rcu **pnf)
{
- struct net *net = nf->nf_net;
+ struct nfsd_file *nf;
+ struct net *net = NULL;
- nfsd_file_put(nf);
+ nf = unrcu_pointer(xchg(pnf, NULL));
+ if (nf) {
+ net = nf->nf_net;
+ nfsd_file_put(nf);
+ }
return net;
}
/**
+ * nfsd_file_get_local - get nfsd_file reference and reference to net
+ * @nf: nfsd_file of which to put the reference
+ *
+ * Get reference to both the nfsd_file and nf->nf_net.
+ */
+struct nfsd_file *
+nfsd_file_get_local(struct nfsd_file *nf)
+{
+ struct net *net = nf->nf_net;
+
+ if (nfsd_net_try_get(net)) {
+ nf = nfsd_file_get(nf);
+ if (!nf)
+ nfsd_net_put(net);
+ } else {
+ nf = NULL;
+ }
+ return nf;
+}
+
+/**
* nfsd_file_file - get the backing file of an nfsd_file
* @nf: nfsd_file of which to access the backing file.
*
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 5865f9c72712..722b26c71e45 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -62,7 +62,8 @@ void nfsd_file_cache_shutdown(void);
int nfsd_file_cache_start_net(struct net *net);
void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf);
-struct net *nfsd_file_put_local(struct nfsd_file *nf);
+struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf);
+struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
struct file *nfsd_file_file(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
index 238647fa379e..80d9ff6608a7 100644
--- a/fs/nfsd/localio.c
+++ b/fs/nfsd/localio.c
@@ -24,21 +24,6 @@
#include "filecache.h"
#include "cache.h"
-static const struct nfsd_localio_operations nfsd_localio_ops = {
- .nfsd_net_try_get = nfsd_net_try_get,
- .nfsd_net_put = nfsd_net_put,
- .nfsd_open_local_fh = nfsd_open_local_fh,
- .nfsd_file_put_local = nfsd_file_put_local,
- .nfsd_file_get = nfsd_file_get,
- .nfsd_file_put = nfsd_file_put,
- .nfsd_file_file = nfsd_file_file,
-};
-
-void nfsd_localio_ops_init(void)
-{
- nfs_to = &nfsd_localio_ops;
-}
-
/**
* nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file
*
@@ -47,6 +32,7 @@ void nfsd_localio_ops_init(void)
* @rpc_clnt: rpc_clnt that the client established
* @cred: cred that the client established
* @nfs_fh: filehandle to lookup
+ * @nfp: place to find the nfsd_file, or store it if it was non-NULL
* @fmode: fmode_t to use for open
*
* This function maps a local fh to a path on a local filesystem.
@@ -57,10 +43,11 @@ void nfsd_localio_ops_init(void)
* set. Caller (NFS client) is responsible for calling nfsd_net_put and
* nfsd_file_put (via nfs_to_nfsd_file_put_local).
*/
-struct nfsd_file *
+static struct nfsd_file *
nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
struct rpc_clnt *rpc_clnt, const struct cred *cred,
- const struct nfs_fh *nfs_fh, const fmode_t fmode)
+ const struct nfs_fh *nfs_fh, struct nfsd_file __rcu **pnf,
+ const fmode_t fmode)
{
int mayflags = NFSD_MAY_LOCALIO;
struct svc_cred rq_cred;
@@ -71,6 +58,15 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
if (nfs_fh->size > NFS4_FHSIZE)
return ERR_PTR(-EINVAL);
+ if (!nfsd_net_try_get(net))
+ return ERR_PTR(-ENXIO);
+
+ rcu_read_lock();
+ localio = nfsd_file_get(rcu_dereference(*pnf));
+ rcu_read_unlock();
+ if (localio)
+ return localio;
+
/* nfs_fh -> svc_fh */
fh_init(&fh, NFS4_FHSIZE);
fh.fh_handle.fh_size = nfs_fh->size;
@@ -92,9 +88,47 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
if (rq_cred.cr_group_info)
put_group_info(rq_cred.cr_group_info);
+ if (!IS_ERR(localio)) {
+ struct nfsd_file *new;
+ if (!nfsd_net_try_get(net)) {
+ nfsd_file_put(localio);
+ nfsd_net_put(net);
+ return ERR_PTR(-ENXIO);
+ }
+ nfsd_file_get(localio);
+ again:
+ new = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(localio)));
+ if (new) {
+ /* Some other thread installed an nfsd_file */
+ if (nfsd_file_get(new) == NULL)
+ goto again;
+ /*
+ * Drop the ref we were going to install and the
+ * one we were going to return.
+ */
+ nfsd_file_put(localio);
+ nfsd_file_put(localio);
+ localio = new;
+ }
+ } else
+ nfsd_net_put(net);
+
return localio;
}
-EXPORT_SYMBOL_GPL(nfsd_open_local_fh);
+
+static const struct nfsd_localio_operations nfsd_localio_ops = {
+ .nfsd_net_try_get = nfsd_net_try_get,
+ .nfsd_net_put = nfsd_net_put,
+ .nfsd_open_local_fh = nfsd_open_local_fh,
+ .nfsd_file_put_local = nfsd_file_put_local,
+ .nfsd_file_get_local = nfsd_file_get_local,
+ .nfsd_file_file = nfsd_file_file,
+};
+
+void nfsd_localio_ops_init(void)
+{
+ nfs_to = &nfsd_localio_ops;
+}
/*
* UUID_IS_LOCAL XDR functions
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 0d8f7fb15c2e..dd0c8e560ef6 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -2102,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree,
ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
if (ret < 0) {
- if (unlikely(ret == -ENOENT))
+ if (unlikely(ret == -ENOENT)) {
nilfs_crit(btree->b_inode->i_sb,
"writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
btree->b_inode->i_ino,
(unsigned long long)key, level);
+ ret = -EINVAL;
+ }
goto out;
}
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 893ab36824cc..2d8dc6b35b54 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap,
dat = nilfs_bmap_get_dat(bmap);
key = nilfs_bmap_data_get_key(bmap, bh);
ptr = nilfs_direct_get_ptr(bmap, key);
+ if (ptr == NILFS_BMAP_INVALID_PTR)
+ return -EINVAL;
+
if (!buffer_nilfs_volatile(bh)) {
oldreq.pr_entry_nr = ptr;
newreq.pr_entry_nr = ptr;
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 2f850a18d6e7..946b0d3534a5 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -422,8 +422,6 @@ static int nilfs_mdt_write_folio(struct folio *folio,
if (wbc->sync_mode == WB_SYNC_ALL)
err = nilfs_construct_segment(sb);
- else if (wbc->for_reclaim)
- nilfs_flush_segment(sb, inode->i_ino);
return err;
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 83970d97840b..61a4141f8d6b 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2221,22 +2221,6 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
spin_unlock(&sci->sc_state_lock);
}
-/**
- * nilfs_flush_segment - trigger a segment construction for resource control
- * @sb: super block
- * @ino: inode number of the file to be flushed out.
- */
-void nilfs_flush_segment(struct super_block *sb, ino_t ino)
-{
- struct the_nilfs *nilfs = sb->s_fs_info;
- struct nilfs_sc_info *sci = nilfs->ns_writer;
-
- if (!sci || nilfs_doing_construction())
- return;
- nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
- /* assign bit 0 to data files */
-}
-
struct nilfs_segctor_wait_request {
wait_queue_entry_t wq;
__u32 seq;
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index f723f47ddc4e..4b39ed43ae72 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -226,7 +226,6 @@ extern void nilfs_relax_pressure_in_lock(struct super_block *);
extern int nilfs_construct_segment(struct super_block *);
extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *,
loff_t, loff_t);
-extern void nilfs_flush_segment(struct super_block *, ino_t);
extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
void **);
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 34ed242e1063..1e99a35691cd 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -913,7 +913,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
struct ntfs_inode *ni = ntfs_i(inode);
u64 valid = ni->i_valid;
struct ntfs_sb_info *sbi = ni->mi.sbi;
- struct page *page, **pages = NULL;
+ struct page **pages = NULL;
+ struct folio *folio;
size_t written = 0;
u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits;
u32 frame_size = 1u << frame_bits;
@@ -923,7 +924,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
u64 frame_vbo;
pgoff_t index;
bool frame_uptodate;
- struct folio *folio;
if (frame_size < PAGE_SIZE) {
/*
@@ -977,8 +977,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
pages_per_frame);
if (err) {
for (ip = 0; ip < pages_per_frame; ip++) {
- page = pages[ip];
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
folio_unlock(folio);
folio_put(folio);
}
@@ -989,10 +988,9 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
ip = off >> PAGE_SHIFT;
off = offset_in_page(valid);
for (; ip < pages_per_frame; ip++, off = 0) {
- page = pages[ip];
- folio = page_folio(page);
- zero_user_segment(page, off, PAGE_SIZE);
- flush_dcache_page(page);
+ folio = page_folio(pages[ip]);
+ folio_zero_segment(folio, off, PAGE_SIZE);
+ flush_dcache_folio(folio);
folio_mark_uptodate(folio);
}
@@ -1001,8 +999,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
ni_unlock(ni);
for (ip = 0; ip < pages_per_frame; ip++) {
- page = pages[ip];
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
folio_mark_uptodate(folio);
folio_unlock(folio);
folio_put(folio);
@@ -1046,8 +1043,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
if (err) {
for (ip = 0; ip < pages_per_frame;
ip++) {
- page = pages[ip];
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
folio_unlock(folio);
folio_put(folio);
}
@@ -1065,10 +1061,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
for (;;) {
size_t cp, tail = PAGE_SIZE - off;
- page = pages[ip];
- cp = copy_page_from_iter_atomic(page, off,
+ folio = page_folio(pages[ip]);
+ cp = copy_folio_from_iter_atomic(folio, off,
min(tail, bytes), from);
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
copied += cp;
bytes -= cp;
@@ -1088,9 +1084,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
ni_unlock(ni);
for (ip = 0; ip < pages_per_frame; ip++) {
- page = pages[ip];
- ClearPageDirty(page);
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
+ folio_clear_dirty(folio);
folio_mark_uptodate(folio);
folio_unlock(folio);
folio_put(folio);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index fce9beb214f0..43e652a2adaf 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1483,7 +1483,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work)
sc_put(sc);
}
-/* socket shutdown does a del_timer_sync against this as it tears down.
+/* socket shutdown does a timer_delete_sync against this as it tears down.
* we can't start this timer until we've got to the point in sc buildup
* where shutdown is going to be involved */
static void o2net_idle_timer(struct timer_list *t)
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 1ad7106741f8..3ad7baf67658 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -505,5 +505,5 @@ static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
ocfs2_filecheck_handle_entry(ent, entry);
exit:
- return (!ret ? count : ret);
+ return ret ?: count;
}
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index e272429da3db..de7f12858729 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -674,7 +674,7 @@ out_put:
break;
}
out:
- kfree(rec);
+ ocfs2_free_quota_recovery(rec);
return status;
}
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index ddd761cf44c8..a28c127b9934 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -691,8 +691,7 @@ static void __exit ocfs2_stack_glue_exit(void)
memset(&locking_max_version, 0,
sizeof(struct ocfs2_protocol_version));
ocfs2_sysfs_exit();
- if (ocfs2_table_header)
- unregister_sysctl_table(ocfs2_table_header);
+ unregister_sysctl_table(ocfs2_table_header);
}
MODULE_AUTHOR("Oracle");
diff --git a/fs/pipe.c b/fs/pipe.c
index da45edd68c41..45077c37bad1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,6 +26,7 @@
#include <linux/memcontrol.h>
#include <linux/watch_queue.h>
#include <linux/sysctl.h>
+#include <linux/sort.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
@@ -76,8 +77,6 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
* -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
*/
-#define cmp_int(l, r) ((l > r) - (l < r))
-
#ifdef CONFIG_PROVE_LOCKING
static int pipe_lock_cmp_fn(const struct lockdep_map *a,
const struct lockdep_map *b)
diff --git a/fs/pnode.c b/fs/pnode.c
index fb77427df39e..ffd429b760d5 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -231,8 +231,8 @@ static int propagate_one(struct mount *m, struct mountpoint *dest_mp)
/* skip if mountpoint isn't visible in m */
if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
return 0;
- /* skip if m is in the anon_ns we are emptying */
- if (m->mnt_ns->mntns_flags & MNTNS_PROPAGATING)
+ /* skip if m is in the anon_ns */
+ if (is_anon_ns(m->mnt_ns))
return 0;
if (peers(m, last_dest)) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fe33a5843fbd..c667702dc69b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -827,7 +827,13 @@ static const struct file_operations proc_single_file_operations = {
.release = single_release,
};
-
+/*
+ * proc_mem_open() can return errno, NULL or mm_struct*.
+ *
+ * - Returns NULL if the task has no mm (PF_KTHREAD or PF_EXITING)
+ * - Returns mm_struct* on success
+ * - Returns error code on failure
+ */
struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
{
struct task_struct *task = get_proc_task(inode);
@@ -854,8 +860,8 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
{
struct mm_struct *mm = proc_mem_open(inode, mode);
- if (IS_ERR(mm))
- return PTR_ERR(mm);
+ if (IS_ERR_OR_NULL(mm))
+ return mm ? PTR_ERR(mm) : -ESRCH;
file->private_data = mm;
return 0;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 23fc771100ae..999af26c7298 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -22,6 +22,12 @@
#define KPMMASK (KPMSIZE - 1)
#define KPMBITS (KPMSIZE * BITS_PER_BYTE)
+enum kpage_operation {
+ KPAGE_FLAGS,
+ KPAGE_COUNT,
+ KPAGE_CGROUP,
+};
+
static inline unsigned long get_max_dump_pfn(void)
{
#ifdef CONFIG_SPARSEMEM
@@ -37,19 +43,17 @@ static inline unsigned long get_max_dump_pfn(void)
#endif
}
-/* /proc/kpagecount - an array exposing page mapcounts
- *
- * Each entry is a u64 representing the corresponding
- * physical page mapcount.
- */
-static ssize_t kpagecount_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t kpage_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos,
+ enum kpage_operation op)
{
const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf;
+ struct page *page;
unsigned long src = *ppos;
unsigned long pfn;
ssize_t ret = 0;
+ u64 info;
pfn = src / KPMSIZE;
if (src & KPMMASK || count & KPMMASK)
@@ -59,24 +63,34 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
while (count > 0) {
- struct page *page;
- u64 mapcount = 0;
-
/*
* TODO: ZONE_DEVICE support requires to identify
* memmaps that were actually initialized.
*/
page = pfn_to_online_page(pfn);
- if (page) {
- struct folio *folio = page_folio(page);
- if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
- mapcount = folio_precise_page_mapcount(folio, page);
- else
- mapcount = folio_average_page_mapcount(folio);
- }
-
- if (put_user(mapcount, out)) {
+ if (page) {
+ switch (op) {
+ case KPAGE_FLAGS:
+ info = stable_page_flags(page);
+ break;
+ case KPAGE_COUNT:
+ if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
+ info = folio_precise_page_mapcount(page_folio(page), page);
+ else
+ info = folio_average_page_mapcount(page_folio(page));
+ break;
+ case KPAGE_CGROUP:
+ info = page_cgroup_ino(page);
+ break;
+ default:
+ info = 0;
+ break;
+ }
+ } else
+ info = 0;
+
+ if (put_user(info, out)) {
ret = -EFAULT;
break;
}
@@ -94,17 +108,23 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
return ret;
}
+/* /proc/kpagecount - an array exposing page mapcounts
+ *
+ * Each entry is a u64 representing the corresponding
+ * physical page mapcount.
+ */
+static ssize_t kpagecount_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return kpage_read(file, buf, count, ppos, KPAGE_COUNT);
+}
+
static const struct proc_ops kpagecount_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_lseek = mem_lseek,
.proc_read = kpagecount_read,
};
-/* /proc/kpageflags - an array exposing page flags
- *
- * Each entry is a u64 representing the corresponding
- * physical page flags.
- */
static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
{
@@ -225,47 +245,17 @@ u64 stable_page_flags(const struct page *page)
#endif
return u;
-};
+}
+/* /proc/kpageflags - an array exposing page flags
+ *
+ * Each entry is a u64 representing the corresponding
+ * physical page flags.
+ */
static ssize_t kpageflags_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
- const unsigned long max_dump_pfn = get_max_dump_pfn();
- u64 __user *out = (u64 __user *)buf;
- unsigned long src = *ppos;
- unsigned long pfn;
- ssize_t ret = 0;
-
- pfn = src / KPMSIZE;
- if (src & KPMMASK || count & KPMMASK)
- return -EINVAL;
- if (src >= max_dump_pfn * KPMSIZE)
- return 0;
- count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
-
- while (count > 0) {
- /*
- * TODO: ZONE_DEVICE support requires to identify
- * memmaps that were actually initialized.
- */
- struct page *page = pfn_to_online_page(pfn);
-
- if (put_user(stable_page_flags(page), out)) {
- ret = -EFAULT;
- break;
- }
-
- pfn++;
- out++;
- count -= KPMSIZE;
-
- cond_resched();
- }
-
- *ppos += (char __user *)out - buf;
- if (!ret)
- ret = (char __user *)out - buf;
- return ret;
+ return kpage_read(file, buf, count, ppos, KPAGE_FLAGS);
}
static const struct proc_ops kpageflags_proc_ops = {
@@ -276,53 +266,10 @@ static const struct proc_ops kpageflags_proc_ops = {
#ifdef CONFIG_MEMCG
static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
- const unsigned long max_dump_pfn = get_max_dump_pfn();
- u64 __user *out = (u64 __user *)buf;
- struct page *ppage;
- unsigned long src = *ppos;
- unsigned long pfn;
- ssize_t ret = 0;
- u64 ino;
-
- pfn = src / KPMSIZE;
- if (src & KPMMASK || count & KPMMASK)
- return -EINVAL;
- if (src >= max_dump_pfn * KPMSIZE)
- return 0;
- count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
-
- while (count > 0) {
- /*
- * TODO: ZONE_DEVICE support requires to identify
- * memmaps that were actually initialized.
- */
- ppage = pfn_to_online_page(pfn);
-
- if (ppage)
- ino = page_cgroup_ino(ppage);
- else
- ino = 0;
-
- if (put_user(ino, out)) {
- ret = -EFAULT;
- break;
- }
-
- pfn++;
- out++;
- count -= KPMSIZE;
-
- cond_resched();
- }
-
- *ppos += (char __user *)out - buf;
- if (!ret)
- ret = (char __user *)out - buf;
- return ret;
+ return kpage_read(file, buf, count, ppos, KPAGE_CGROUP);
}
-
static const struct proc_ops kpagecgroup_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_lseek = mem_lseek,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 994cde10e3f4..27972c0749e7 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -212,8 +212,8 @@ static int proc_maps_open(struct inode *inode, struct file *file,
priv->inode = inode;
priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR(priv->mm)) {
- int err = PTR_ERR(priv->mm);
+ if (IS_ERR_OR_NULL(priv->mm)) {
+ int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH;
seq_release_private(inode, file);
return err;
@@ -1325,8 +1325,8 @@ static int smaps_rollup_open(struct inode *inode, struct file *file)
priv->inode = inode;
priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR(priv->mm)) {
- ret = PTR_ERR(priv->mm);
+ if (IS_ERR_OR_NULL(priv->mm)) {
+ ret = priv->mm ? PTR_ERR(priv->mm) : -ESRCH;
single_release(inode, file);
goto out_free;
@@ -2069,8 +2069,8 @@ static int pagemap_open(struct inode *inode, struct file *file)
struct mm_struct *mm;
mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR(mm))
- return PTR_ERR(mm);
+ if (IS_ERR_OR_NULL(mm))
+ return mm ? PTR_ERR(mm) : -ESRCH;
file->private_data = mm;
return 0;
}
@@ -2087,7 +2087,8 @@ static int pagemap_release(struct inode *inode, struct file *file)
#define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
PAGE_IS_FILE | PAGE_IS_PRESENT | \
PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
- PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY)
+ PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY | \
+ PAGE_IS_GUARD)
#define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)
struct pagemap_scan_private {
@@ -2128,12 +2129,14 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
- if (p->masks_of_interest & PAGE_IS_FILE) {
- swp = pte_to_swp_entry(pte);
- if (is_pfn_swap_entry(swp) &&
- !folio_test_anon(pfn_swap_entry_folio(swp)))
- categories |= PAGE_IS_FILE;
- }
+ swp = pte_to_swp_entry(pte);
+ if (is_guard_swp_entry(swp))
+ categories |= PAGE_IS_GUARD;
+ else if ((p->masks_of_interest & PAGE_IS_FILE) &&
+ is_pfn_swap_entry(swp) &&
+ !folio_test_anon(pfn_swap_entry_folio(swp)))
+ categories |= PAGE_IS_FILE;
+
if (pte_swp_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
}
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index bce674533000..59bfd61d653a 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -260,8 +260,8 @@ static int maps_open(struct inode *inode, struct file *file,
priv->inode = inode;
priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR(priv->mm)) {
- int err = PTR_ERR(priv->mm);
+ if (IS_ERR_OR_NULL(priv->mm)) {
+ int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH;
seq_release_private(inode, file);
return err;
diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 89d2dbbb742c..5200a0f3cafc 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -155,6 +155,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
struct cached_fids *cfids;
const char *npath;
int retries = 0, cur_sleep = 1;
+ __le32 lease_flags = 0;
if (cifs_sb->root == NULL)
return -ENOENT;
@@ -201,6 +202,8 @@ replay_again:
}
spin_unlock(&cfids->cfid_list_lock);
+ pfid = &cfid->fid;
+
/*
* Skip any prefix paths in @path as lookup_noperm_positive_unlocked() ends up
* calling ->lookup() which already adds those through
@@ -222,6 +225,25 @@ replay_again:
rc = -ENOENT;
goto out;
}
+ if (dentry->d_parent && server->dialect >= SMB30_PROT_ID) {
+ struct cached_fid *parent_cfid;
+
+ spin_lock(&cfids->cfid_list_lock);
+ list_for_each_entry(parent_cfid, &cfids->entries, entry) {
+ if (parent_cfid->dentry == dentry->d_parent) {
+ cifs_dbg(FYI, "found a parent cached file handle\n");
+ if (parent_cfid->has_lease && parent_cfid->time) {
+ lease_flags
+ |= SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE;
+ memcpy(pfid->parent_lease_key,
+ parent_cfid->fid.lease_key,
+ SMB2_LEASE_KEY_SIZE);
+ }
+ break;
+ }
+ }
+ spin_unlock(&cfids->cfid_list_lock);
+ }
}
cfid->dentry = dentry;
cfid->tcon = tcon;
@@ -236,7 +258,6 @@ replay_again:
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
- pfid = &cfid->fid;
server->ops->new_lease_key(pfid);
memset(rqst, 0, sizeof(rqst));
@@ -256,6 +277,7 @@ replay_again:
FILE_READ_EA,
.disposition = FILE_OPEN,
.fid = pfid,
+ .lease_flags = lease_flags,
.replay = !!(retries),
};
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index fb04e263611c..0a5266ecfd15 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -70,7 +70,6 @@ bool require_gcm_256; /* false by default */
bool enable_negotiate_signing; /* false by default */
unsigned int global_secflags = CIFSSEC_DEF;
/* unsigned int ntlmv2_support = 0; */
-unsigned int sign_CIFS_PDUs = 1;
/*
* Global transaction id (XID) information
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 3b32116b0b49..ad7dd16db3e9 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -556,7 +556,7 @@ struct smb_version_operations {
void (*set_oplock_level)(struct cifsInodeInfo *cinode, __u32 oplock, __u16 epoch,
bool *purge_cache);
/* create lease context buffer for CREATE request */
- char * (*create_lease_buf)(u8 *lease_key, u8 oplock);
+ char * (*create_lease_buf)(u8 *lease_key, u8 oplock, u8 *parent_lease_key, __le32 le_flags);
/* parse lease context buffer and return oplock/epoch info */
__u8 (*parse_lease_buf)(void *buf, __u16 *epoch, char *lkey);
ssize_t (*copychunk_range)(const unsigned int,
@@ -773,6 +773,7 @@ struct TCP_Server_Info {
char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
__u32 sequence_number; /* for signing, protected by srv_mutex */
__u32 reconnect_instance; /* incremented on each reconnect */
+ __le32 session_key_id; /* retrieved from negotiate response and send in session setup request */
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
@@ -1441,6 +1442,7 @@ struct cifs_open_parms {
bool reconnect:1;
bool replay:1; /* indicates that this open is for a replay */
struct kvec *ea_cctx;
+ __le32 lease_flags;
};
struct cifs_fid {
@@ -1448,6 +1450,7 @@ struct cifs_fid {
__u64 persistent_fid; /* persist file id for smb2 */
__u64 volatile_fid; /* volatile file id for smb2 */
__u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */
+ __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE];
__u8 create_guid[16];
__u32 access;
struct cifs_pending_open *pending_open;
@@ -1988,8 +1991,7 @@ require use of the stronger protocol */
* TCP_Server_Info-> TCP_Server_Info cifs_get_tcp_session
* reconnect_mutex
* TCP_Server_Info->srv_mutex TCP_Server_Info cifs_get_tcp_session
- * cifs_ses->session_mutex cifs_ses sesInfoAlloc
- * cifs_tcon
+ * cifs_ses->session_mutex cifs_ses sesInfoAlloc
* cifs_tcon->open_file_lock cifs_tcon->openFileList tconInfoAlloc
* cifs_tcon->pending_opens
* cifs_tcon->stat_lock cifs_tcon->bytes_read tconInfoAlloc
@@ -2008,21 +2010,25 @@ require use of the stronger protocol */
* ->oplock_credits
* ->reconnect_instance
* cifs_ses->ses_lock (anything that is not protected by another lock and can change)
+ * sesInfoAlloc
* cifs_ses->iface_lock cifs_ses->iface_list sesInfoAlloc
* ->iface_count
* ->iface_last_update
- * cifs_ses->chan_lock cifs_ses->chans
+ * cifs_ses->chan_lock cifs_ses->chans sesInfoAlloc
* ->chans_need_reconnect
* ->chans_in_reconnect
* cifs_tcon->tc_lock (anything that is not protected by another lock and can change)
+ * tcon_info_alloc
* inode->i_rwsem, taken by fs/netfs/locking.c e.g. should be taken before cifsInodeInfo locks
* cifsInodeInfo->open_file_lock cifsInodeInfo->openFileList cifs_alloc_inode
* cifsInodeInfo->writers_lock cifsInodeInfo->writers cifsInodeInfo_alloc
* cifsInodeInfo->lock_sem cifsInodeInfo->llist cifs_init_once
* ->can_cache_brlcks
* cifsInodeInfo->deferred_lock cifsInodeInfo->deferred_closes cifsInodeInfo_alloc
- * cached_fids->cfid_list_lock cifs_tcon->cfids->entries init_cached_dirs
- * cifsFileInfo->fh_mutex cifsFileInfo cifs_new_fileinfo
+ * cached_fids->cfid_list_lock cifs_tcon->cfids->entries init_cached_dirs
+ * cached_fid->fid_lock (anything that is not protected by another lock and can change)
+ * init_cached_dir
+ * cifsFileInfo->fh_mutex cifsFileInfo cifs_new_fileinfo
* cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo
* ->invalidHandle initiate_cifs_search
* ->oplock_break_cancelled
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index 1b79fe07476f..d9cf7db0ac35 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -597,7 +597,7 @@ typedef union smb_com_session_setup_andx {
__le16 MaxBufferSize;
__le16 MaxMpxCount;
__le16 VcNumber;
- __u32 SessionKey;
+ __le32 SessionKey;
__le16 SecurityBlobLength;
__u32 Reserved;
__le32 Capabilities; /* see below */
@@ -616,7 +616,7 @@ typedef union smb_com_session_setup_andx {
__le16 MaxBufferSize;
__le16 MaxMpxCount;
__le16 VcNumber;
- __u32 SessionKey;
+ __le32 SessionKey;
__le16 CaseInsensitivePasswordLength; /* ASCII password len */
__le16 CaseSensitivePasswordLength; /* Unicode password length*/
__u32 Reserved; /* see below */
@@ -654,7 +654,7 @@ typedef union smb_com_session_setup_andx {
__le16 MaxBufferSize;
__le16 MaxMpxCount;
__le16 VcNumber;
- __u32 SessionKey;
+ __le32 SessionKey;
__le16 PasswordLength;
__u32 Reserved; /* encrypt key len and offset */
__le16 ByteCount;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index ecf774a8f1ca..66093fa78aed 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -151,8 +151,7 @@ extern bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 eof,
bool from_readdir);
extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
unsigned int bytes_written);
-void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result,
- bool was_async);
+void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result);
extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int);
extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode,
int flags,
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index f55457b4b82e..7216fcec79e8 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -498,6 +498,7 @@ CIFSSMBNegotiate(const unsigned int xid,
server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
cifs_dbg(NOISY, "Max buf = %d\n", ses->server->maxBuf);
server->capabilities = le32_to_cpu(pSMBr->Capabilities);
+ server->session_key_id = pSMBr->SessionKey;
server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
server->timeAdj *= 60;
@@ -1725,7 +1726,7 @@ cifs_writev_callback(struct mid_q_entry *mid)
server->credits, server->in_flight,
0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
- cifs_write_subrequest_terminated(wdata, result, true);
+ cifs_write_subrequest_terminated(wdata, result);
release_mid(mid);
trace_smb3_rw_credits(credits.rreq_debug_id, credits.rreq_debug_index, 0,
server->credits, server->in_flight,
@@ -1813,7 +1814,7 @@ async_writev_out:
out:
if (rc) {
add_credits_and_wake_if(wdata->server, &wdata->credits, 0);
- cifs_write_subrequest_terminated(wdata, rc, false);
+ cifs_write_subrequest_terminated(wdata, rc);
}
}
@@ -2753,10 +2754,10 @@ int cifs_query_reparse_point(const unsigned int xid,
io_req->TotalParameterCount = 0;
io_req->TotalDataCount = 0;
- io_req->MaxParameterCount = cpu_to_le32(2);
+ io_req->MaxParameterCount = cpu_to_le32(0);
/* BB find exact data count max from sess structure BB */
io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00);
- io_req->MaxSetupCount = 4;
+ io_req->MaxSetupCount = 1;
io_req->Reserved = 0;
io_req->ParameterOffset = 0;
io_req->DataCount = 0;
@@ -2783,6 +2784,22 @@ int cifs_query_reparse_point(const unsigned int xid,
goto error;
}
+ /* SetupCount must be 1, otherwise offset to ByteCount is incorrect. */
+ if (io_rsp->SetupCount != 1) {
+ rc = -EIO;
+ goto error;
+ }
+
+ /*
+ * ReturnedDataLen is output length of executed IOCTL.
+ * DataCount is output length transferred over network.
+ * Check that we have full FSCTL_GET_REPARSE_POINT buffer.
+ */
+ if (data_count != le16_to_cpu(io_rsp->ReturnedDataLen)) {
+ rc = -EIO;
+ goto error;
+ }
+
end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount;
start = (__u8 *)&io_rsp->hdr.Protocol + data_offset;
if (start >= end) {
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 6bf04d9a5491..024817d40c5f 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -377,6 +377,13 @@ static int __cifs_reconnect(struct TCP_Server_Info *server,
if (!cifs_tcp_ses_needs_reconnect(server, 1))
return 0;
+ /*
+ * if smb session has been marked for reconnect, also reconnect all
+ * connections. This way, the other connections do not end up bad.
+ */
+ if (mark_smb_session)
+ cifs_signal_cifsd_for_reconnect(server, mark_smb_session);
+
cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session);
cifs_abort_connection(server);
@@ -385,7 +392,8 @@ static int __cifs_reconnect(struct TCP_Server_Info *server,
try_to_freeze();
cifs_server_lock(server);
- if (!cifs_swn_set_server_dstaddr(server)) {
+ if (!cifs_swn_set_server_dstaddr(server) &&
+ !SERVER_IS_CHAN(server)) {
/* resolve the hostname again to make sure that IP address is up-to-date */
rc = reconn_set_ipaddr_from_hostname(server);
cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc);
diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c
index d1e95632ac54..1c6e5389c51f 100644
--- a/fs/smb/client/dir.c
+++ b/fs/smb/client/dir.c
@@ -23,6 +23,7 @@
#include "fs_context.h"
#include "cifs_ioctl.h"
#include "fscache.h"
+#include "cached_dir.h"
static void
renew_parental_timestamps(struct dentry *direntry)
@@ -190,6 +191,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
struct TCP_Server_Info *server = tcon->ses->server;
struct cifs_open_parms oparms;
int rdwr_for_fscache = 0;
+ __le32 lease_flags = 0;
*oplock = 0;
if (tcon->ses->server->oplocks)
@@ -312,6 +314,26 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned
create_options |= CREATE_OPTION_READONLY;
retry_open:
+ if (tcon->cfids && direntry->d_parent && server->dialect >= SMB30_PROT_ID) {
+ struct cached_fid *parent_cfid;
+
+ spin_lock(&tcon->cfids->cfid_list_lock);
+ list_for_each_entry(parent_cfid, &tcon->cfids->entries, entry) {
+ if (parent_cfid->dentry == direntry->d_parent) {
+ cifs_dbg(FYI, "found a parent cached file handle\n");
+ if (parent_cfid->has_lease && parent_cfid->time) {
+ lease_flags
+ |= SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE;
+ memcpy(fid->parent_lease_key,
+ parent_cfid->fid.lease_key,
+ SMB2_LEASE_KEY_SIZE);
+ }
+ break;
+ }
+ }
+ spin_unlock(&tcon->cfids->cfid_list_lock);
+ }
+
oparms = (struct cifs_open_parms) {
.tcon = tcon,
.cifs_sb = cifs_sb,
@@ -320,6 +342,7 @@ retry_open:
.disposition = disposition,
.path = full_path,
.fid = fid,
+ .lease_flags = lease_flags,
.mode = mode,
};
rc = server->ops->open(xid, &oparms, oplock, buf);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 950aa4f912f5..d2df10b8e6fd 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -130,7 +130,7 @@ fail:
else
trace_netfs_sreq(subreq, netfs_sreq_trace_fail);
add_credits_and_wake_if(wdata->server, &wdata->credits, 0);
- cifs_write_subrequest_terminated(wdata, rc, false);
+ cifs_write_subrequest_terminated(wdata, rc);
goto out;
}
@@ -219,7 +219,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq)
goto failed;
}
- if (subreq->rreq->origin != NETFS_DIO_READ)
+ if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+ subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
@@ -2423,8 +2424,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
return rc;
}
-void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result,
- bool was_async)
+void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result)
{
struct netfs_io_request *wreq = wdata->rreq;
struct netfs_inode *ictx = netfs_inode(wreq->inode);
@@ -2441,7 +2441,7 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t
netfs_resize_file(ictx, wrend, true);
}
- netfs_write_subrequest_terminated(&wdata->subreq, result, was_async);
+ netfs_write_subrequest_terminated(&wdata->subreq, result);
}
struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index 7b6ed9b23e71..e77017f47084 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -326,6 +326,14 @@ check_smb_hdr(struct smb_hdr *smb)
if (smb->Command == SMB_COM_LOCKING_ANDX)
return 0;
+ /*
+ * Windows NT server returns error resposne (e.g. STATUS_DELETE_PENDING
+ * or STATUS_OBJECT_NAME_NOT_FOUND or ERRDOS/ERRbadfile or any other)
+ * for some TRANS2 requests without the RESPONSE flag set in header.
+ */
+ if (smb->Command == SMB_COM_TRANSACTION2 && smb->Status.CifsError != 0)
+ return 0;
+
cifs_dbg(VFS, "Server sent request, not response. mid=%u\n",
get_mid(smb));
return 1;
diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c
index e3f9213131c4..52a520349cb7 100644
--- a/fs/smb/client/namespace.c
+++ b/fs/smb/client/namespace.c
@@ -146,6 +146,9 @@ static char *automount_fullpath(struct dentry *dentry, void *page)
}
spin_unlock(&tcon->tc_lock);
+ if (unlikely(!page))
+ return ERR_PTR(-ENOMEM);
+
s = dentry_path_raw(dentry, page, PATH_MAX);
if (IS_ERR(s))
return s;
@@ -283,7 +286,6 @@ struct vfsmount *cifs_d_automount(struct path *path)
return newmnt;
}
- mntget(newmnt); /* prevent immediate expiration */
mnt_set_expiry(newmnt, &cifs_automount_list);
schedule_delayed_work(&cifs_automount_task,
cifs_mountpoint_expiry_timeout);
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index b3fa9ee26912..ec0db32c7d98 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -445,6 +445,10 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
ses->chans[chan_index].iface = iface;
spin_unlock(&ses->chan_lock);
+
+ spin_lock(&server->srv_lock);
+ memcpy(&server->dstaddr, &iface->sockaddr, sizeof(server->dstaddr));
+ spin_unlock(&server->srv_lock);
}
static int
@@ -628,6 +632,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses,
USHRT_MAX));
pSMB->req.MaxMpxCount = cpu_to_le16(server->maxReq);
pSMB->req.VcNumber = cpu_to_le16(1);
+ pSMB->req.SessionKey = server->session_key_id;
/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
@@ -1684,22 +1689,22 @@ _sess_auth_rawntlmssp_assemble_req(struct sess_data *sess_data)
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
capabilities = cifs_ssetup_hdr(ses, server, pSMB);
- if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
- cifs_dbg(VFS, "NTLMSSP requires Unicode support\n");
- return -ENOSYS;
- }
-
pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
capabilities |= CAP_EXTENDED_SECURITY;
pSMB->req.Capabilities |= cpu_to_le32(capabilities);
bcc_ptr = sess_data->iov[2].iov_base;
- /* unicode strings must be word aligned */
- if (!IS_ALIGNED(sess_data->iov[0].iov_len + sess_data->iov[1].iov_len, 2)) {
- *bcc_ptr = 0;
- bcc_ptr++;
+
+ if (pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) {
+ /* unicode strings must be word aligned */
+ if (!IS_ALIGNED(sess_data->iov[0].iov_len + sess_data->iov[1].iov_len, 2)) {
+ *bcc_ptr = 0;
+ bcc_ptr++;
+ }
+ unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
+ } else {
+ ascii_oslm_strings(&bcc_ptr, sess_data->nls_cp);
}
- unicode_oslm_strings(&bcc_ptr, sess_data->nls_cp);
sess_data->iov[2].iov_len = (long) bcc_ptr -
(long) sess_data->iov[2].iov_base;
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 2fe8eeb98535..bab9f567d9b7 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4069,7 +4069,7 @@ map_oplock_to_lease(u8 oplock)
}
static char *
-smb2_create_lease_buf(u8 *lease_key, u8 oplock)
+smb2_create_lease_buf(u8 *lease_key, u8 oplock, u8 *parent_lease_key, __le32 flags)
{
struct create_lease *buf;
@@ -4095,7 +4095,7 @@ smb2_create_lease_buf(u8 *lease_key, u8 oplock)
}
static char *
-smb3_create_lease_buf(u8 *lease_key, u8 oplock)
+smb3_create_lease_buf(u8 *lease_key, u8 oplock, u8 *parent_lease_key, __le32 flags)
{
struct create_lease_v2 *buf;
@@ -4105,6 +4105,9 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock)
memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE);
buf->lcontext.LeaseState = map_oplock_to_lease(oplock);
+ buf->lcontext.LeaseFlags = flags;
+ if (flags & SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE)
+ memcpy(&buf->lcontext.ParentLeaseKey, parent_lease_key, SMB2_LEASE_KEY_SIZE);
buf->ccontext.DataOffset = cpu_to_le16(offsetof
(struct create_lease_v2, lcontext));
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 4e28632b5fd6..0c320d06809c 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -2392,11 +2392,16 @@ static int
add_lease_context(struct TCP_Server_Info *server,
struct smb2_create_req *req,
struct kvec *iov,
- unsigned int *num_iovec, u8 *lease_key, __u8 *oplock)
+ unsigned int *num_iovec,
+ u8 *lease_key,
+ __u8 *oplock,
+ u8 *parent_lease_key,
+ __le32 flags)
{
unsigned int num = *num_iovec;
- iov[num].iov_base = server->ops->create_lease_buf(lease_key, *oplock);
+ iov[num].iov_base = server->ops->create_lease_buf(lease_key, *oplock,
+ parent_lease_key, flags);
if (iov[num].iov_base == NULL)
return -ENOMEM;
iov[num].iov_len = server->vals->create_lease_size;
@@ -3069,7 +3074,9 @@ SMB2_open_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
req->RequestedOplockLevel = *oplock; /* no srv lease support */
else {
rc = add_lease_context(server, req, iov, &n_iov,
- oparms->fid->lease_key, oplock);
+ oparms->fid->lease_key, oplock,
+ oparms->fid->parent_lease_key,
+ oparms->lease_flags);
if (rc)
return rc;
}
@@ -4888,7 +4895,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
- cifs_write_subrequest_terminated(wdata, result ?: written, true);
+ cifs_write_subrequest_terminated(wdata, result ?: written);
release_mid(mid);
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
server->credits, server->in_flight,
@@ -5061,7 +5068,7 @@ out:
-(int)wdata->credits.value,
cifs_trace_rw_credits_write_response_clear);
add_credits_and_wake_if(wdata->server, &wdata->credits, 0);
- cifs_write_subrequest_terminated(wdata, rc, true);
+ cifs_write_subrequest_terminated(wdata, rc);
}
}
@@ -5917,71 +5924,6 @@ posix_qfsinf_exit:
}
int
-SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid, struct kstatfs *fsdata)
-{
- struct smb_rqst rqst;
- struct smb2_query_info_rsp *rsp = NULL;
- struct kvec iov;
- struct kvec rsp_iov;
- int rc = 0;
- int resp_buftype;
- struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server;
- struct smb2_fs_full_size_info *info = NULL;
- int flags = 0;
- int retries = 0, cur_sleep = 1;
-
-replay_again:
- /* reinitialize for possible replay */
- flags = 0;
- server = cifs_pick_channel(ses);
-
- rc = build_qfs_info_req(&iov, tcon, server,
- FS_FULL_SIZE_INFORMATION,
- sizeof(struct smb2_fs_full_size_info),
- persistent_fid, volatile_fid);
- if (rc)
- return rc;
-
- if (smb3_encryption_required(tcon))
- flags |= CIFS_TRANSFORM_REQ;
-
- memset(&rqst, 0, sizeof(struct smb_rqst));
- rqst.rq_iov = &iov;
- rqst.rq_nvec = 1;
-
- if (retries)
- smb2_set_replay(server, &rqst);
-
- rc = cifs_send_recv(xid, ses, server,
- &rqst, &resp_buftype, flags, &rsp_iov);
- free_qfs_info_req(&iov);
- if (rc) {
- cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
- goto qfsinf_exit;
- }
- rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
-
- info = (struct smb2_fs_full_size_info *)(
- le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
- rc = smb2_validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
- le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
- sizeof(struct smb2_fs_full_size_info));
- if (!rc)
- smb2_copy_fs_info_to_kstatfs(info, fsdata);
-
-qfsinf_exit:
- free_rsp_buf(resp_buftype, rsp_iov.iov_base);
-
- if (is_replayable_error(rc) &&
- smb2_should_replay(tcon, &retries, &cur_sleep))
- goto replay_again;
-
- return rc;
-}
-
-int
SMB2_QFS_attr(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, int level)
{
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index 4662c7e2d259..035aa1624053 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -259,9 +259,6 @@ extern int smb2_handle_cancelled_close(struct cifs_tcon *tcon,
__u64 volatile_fid);
extern int smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server);
void smb2_cancelled_close_fid(struct work_struct *work);
-extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_file_id, u64 volatile_file_id,
- struct kstatfs *FSData);
extern int SMB311_posix_qfs_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
struct kstatfs *FSData);
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index b1091e70434a..a9602aae21ef 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -149,6 +149,27 @@ config SQUASHFS_XATTR
If unsure, say N.
+config SQUASHFS_COMP_CACHE_FULL
+ bool "Enable full caching of compressed blocks"
+ depends on SQUASHFS
+ default n
+ help
+ This option enables caching of all compressed blocks, Without caching,
+ repeated reads of the same files trigger excessive disk I/O, significantly
+ reducinng performance in workloads like fio-based benchmarks.
+
+ For example, fio tests (iodepth=1, numjobs=1, ioengine=psync) show:
+ With caching: IOPS=2223, BW=278MiB/s (291MB/s)
+ Without caching: IOPS=815, BW=102MiB/s (107MB/s)
+
+ Enabling this option restores performance to pre-regression levels by
+ caching all compressed blocks in the page cache, reducing disk I/O for
+ repeated reads. However, this increases memory usage, which may be a
+ concern in memory-constrained environments.
+
+ Enable this option if your workload involves frequent repeated reads and
+ memory usage is not a limiting factor. If unsure, say N.
+
config SQUASHFS_ZLIB
bool "Include support for ZLIB compressed file systems"
depends on SQUASHFS
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2dc730800f44..3061043e915c 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -88,6 +88,10 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
struct bio_vec *bv;
int idx = 0;
int err = 0;
+#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
+ struct page **cache_pages = kmalloc_array(page_count,
+ sizeof(void *), GFP_KERNEL | __GFP_ZERO);
+#endif
bio_for_each_segment_all(bv, fullbio, iter_all) {
struct page *page = bv->bv_page;
@@ -110,6 +114,11 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
head_to_cache = page;
else if (idx == page_count - 1 && index + length != read_end)
tail_to_cache = page;
+#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
+ /* Cache all pages in the BIO for repeated reads */
+ else if (cache_pages)
+ cache_pages[idx] = page;
+#endif
if (!bio || idx != end_idx) {
struct bio *new = bio_alloc_clone(bdev, fullbio,
@@ -163,6 +172,25 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
}
}
+#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
+ if (!cache_pages)
+ goto out;
+
+ for (idx = 0; idx < page_count; idx++) {
+ if (!cache_pages[idx])
+ continue;
+ int ret = add_to_page_cache_lru(cache_pages[idx], cache_mapping,
+ (read_start >> PAGE_SHIFT) + idx,
+ GFP_NOIO);
+
+ if (!ret) {
+ SetPageUptodate(cache_pages[idx]);
+ unlock_page(cache_pages[idx]);
+ }
+ }
+ kfree(cache_pages);
+out:
+#endif
return 0;
}
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 67c55fe32ce8..992ea0e37257 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -202,6 +202,11 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
+ if (!msblk->devblksize) {
+ errorf(fc, "squashfs: unable to set blocksize\n");
+ return -EINVAL;
+ }
+
msblk->devblksize_log2 = ffz(~msblk->devblksize);
mutex_init(&msblk->meta_index_mutex);
diff --git a/fs/super.c b/fs/super.c
index bcc4e87123c8..21799e213fd7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -824,13 +824,6 @@ struct super_block *sget(struct file_system_type *type,
struct super_block *old;
int err;
- /* We don't yet pass the user namespace of the parent
- * mount through to here so always use &init_user_ns
- * until that changes.
- */
- if (flags & SB_SUBMOUNT)
- user_ns = &init_user_ns;
-
retry:
spin_lock(&sb_lock);
if (test) {
@@ -850,7 +843,7 @@ retry:
}
if (!s) {
spin_unlock(&sb_lock);
- s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns);
+ s = alloc_super(type, flags, user_ns);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 762699c1bcf6..eea718ac66b4 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -83,11 +83,11 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/init.h>
-#include <linux/parser.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/log2.h>
-#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/iversion.h>
@@ -289,7 +289,7 @@ void ufs_error (struct super_block * sb, const char * function,
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
- switch (UFS_SB(sb)->s_mount_opt & UFS_MOUNT_ONERROR) {
+ switch (UFS_SB(sb)->s_on_err) {
case UFS_MOUNT_ONERROR_PANIC:
panic("panic (device %s): %s: %pV\n",
sb->s_id, function, &vaf);
@@ -342,124 +342,74 @@ void ufs_warning (struct super_block * sb, const char * function,
va_end(args);
}
-enum {
- Opt_type_old = UFS_MOUNT_UFSTYPE_OLD,
- Opt_type_sunx86 = UFS_MOUNT_UFSTYPE_SUNx86,
- Opt_type_sun = UFS_MOUNT_UFSTYPE_SUN,
- Opt_type_sunos = UFS_MOUNT_UFSTYPE_SUNOS,
- Opt_type_44bsd = UFS_MOUNT_UFSTYPE_44BSD,
- Opt_type_ufs2 = UFS_MOUNT_UFSTYPE_UFS2,
- Opt_type_hp = UFS_MOUNT_UFSTYPE_HP,
- Opt_type_nextstepcd = UFS_MOUNT_UFSTYPE_NEXTSTEP_CD,
- Opt_type_nextstep = UFS_MOUNT_UFSTYPE_NEXTSTEP,
- Opt_type_openstep = UFS_MOUNT_UFSTYPE_OPENSTEP,
- Opt_onerror_panic = UFS_MOUNT_ONERROR_PANIC,
- Opt_onerror_lock = UFS_MOUNT_ONERROR_LOCK,
- Opt_onerror_umount = UFS_MOUNT_ONERROR_UMOUNT,
- Opt_onerror_repair = UFS_MOUNT_ONERROR_REPAIR,
- Opt_err
+enum { Opt_type, Opt_onerror };
+
+static const struct constant_table ufs_param_ufstype[] = {
+ {"old", UFS_MOUNT_UFSTYPE_OLD},
+ {"sunx86", UFS_MOUNT_UFSTYPE_SUNx86},
+ {"sun", UFS_MOUNT_UFSTYPE_SUN},
+ {"sunos", UFS_MOUNT_UFSTYPE_SUNOS},
+ {"44bsd", UFS_MOUNT_UFSTYPE_44BSD},
+ {"ufs2", UFS_MOUNT_UFSTYPE_UFS2},
+ {"5xbsd", UFS_MOUNT_UFSTYPE_UFS2},
+ {"hp", UFS_MOUNT_UFSTYPE_HP},
+ {"nextstep-cd", UFS_MOUNT_UFSTYPE_NEXTSTEP_CD},
+ {"nextstep", UFS_MOUNT_UFSTYPE_NEXTSTEP},
+ {"openstep", UFS_MOUNT_UFSTYPE_OPENSTEP},
+ {}
};
-static const match_table_t tokens = {
- {Opt_type_old, "ufstype=old"},
- {Opt_type_sunx86, "ufstype=sunx86"},
- {Opt_type_sun, "ufstype=sun"},
- {Opt_type_sunos, "ufstype=sunos"},
- {Opt_type_44bsd, "ufstype=44bsd"},
- {Opt_type_ufs2, "ufstype=ufs2"},
- {Opt_type_ufs2, "ufstype=5xbsd"},
- {Opt_type_hp, "ufstype=hp"},
- {Opt_type_nextstepcd, "ufstype=nextstep-cd"},
- {Opt_type_nextstep, "ufstype=nextstep"},
- {Opt_type_openstep, "ufstype=openstep"},
-/*end of possible ufs types */
- {Opt_onerror_panic, "onerror=panic"},
- {Opt_onerror_lock, "onerror=lock"},
- {Opt_onerror_umount, "onerror=umount"},
- {Opt_onerror_repair, "onerror=repair"},
- {Opt_err, NULL}
+static const struct constant_table ufs_param_onerror[] = {
+ {"panic", UFS_MOUNT_ONERROR_PANIC},
+ {"lock", UFS_MOUNT_ONERROR_LOCK},
+ {"umount", UFS_MOUNT_ONERROR_UMOUNT},
+ {"repair", UFS_MOUNT_ONERROR_REPAIR},
+ {}
};
-static int ufs_parse_options (char * options, unsigned * mount_options)
+static const struct fs_parameter_spec ufs_param_spec[] = {
+ fsparam_enum ("ufstype", Opt_type, ufs_param_ufstype),
+ fsparam_enum ("onerror", Opt_onerror, ufs_param_onerror),
+ {}
+};
+
+struct ufs_fs_context {
+ unsigned int flavour, on_err;
+};
+
+static int ufs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- char * p;
-
+ struct ufs_fs_context *ctx = fc->fs_private;
+ struct fs_parse_result result;
+ int opt;
+
UFSD("ENTER\n");
-
- if (!options)
- return 1;
- while ((p = strsep(&options, ",")) != NULL) {
- substring_t args[MAX_OPT_ARGS];
- int token;
- if (!*p)
- continue;
+ opt = fs_parse(fc, ufs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_type_old:
- ufs_clear_opt (*mount_options, UFSTYPE);
- ufs_set_opt (*mount_options, UFSTYPE_OLD);
- break;
- case Opt_type_sunx86:
- ufs_clear_opt (*mount_options, UFSTYPE);
- ufs_set_opt (*mount_options, UFSTYPE_SUNx86);
- break;
- case Opt_type_sun:
- ufs_clear_opt (*mount_options, UFSTYPE);
- ufs_set_opt (*mount_options, UFSTYPE_SUN);
- break;
- case Opt_type_sunos:
- ufs_clear_opt(*mount_options, UFSTYPE);
- ufs_set_opt(*mount_options, UFSTYPE_SUNOS);
- break;
- case Opt_type_44bsd:
- ufs_clear_opt (*mount_options, UFSTYPE);
- ufs_set_opt (*mount_options, UFSTYPE_44BSD);
- break;
- case Opt_type_ufs2:
- ufs_clear_opt(*mount_options, UFSTYPE);
- ufs_set_opt(*mount_options, UFSTYPE_UFS2);
- break;
- case Opt_type_hp:
- ufs_clear_opt (*mount_options, UFSTYPE);
- ufs_set_opt (*mount_options, UFSTYPE_HP);
- break;
- case Opt_type_nextstepcd:
- ufs_clear_opt (*mount_options, UFSTYPE);
- ufs_set_opt (*mount_options, UFSTYPE_NEXTSTEP_CD);
- break;
- case Opt_type_nextstep:
- ufs_clear_opt (*mount_options, UFSTYPE);
- ufs_set_opt (*mount_options, UFSTYPE_NEXTSTEP);
- break;
- case Opt_type_openstep:
- ufs_clear_opt (*mount_options, UFSTYPE);
- ufs_set_opt (*mount_options, UFSTYPE_OPENSTEP);
- break;
- case Opt_onerror_panic:
- ufs_clear_opt (*mount_options, ONERROR);
- ufs_set_opt (*mount_options, ONERROR_PANIC);
- break;
- case Opt_onerror_lock:
- ufs_clear_opt (*mount_options, ONERROR);
- ufs_set_opt (*mount_options, ONERROR_LOCK);
- break;
- case Opt_onerror_umount:
- ufs_clear_opt (*mount_options, ONERROR);
- ufs_set_opt (*mount_options, ONERROR_UMOUNT);
- break;
- case Opt_onerror_repair:
- pr_err("Unable to do repair on error, will lock lock instead\n");
- ufs_clear_opt (*mount_options, ONERROR);
- ufs_set_opt (*mount_options, ONERROR_REPAIR);
- break;
- default:
- pr_err("Invalid option: \"%s\" or missing value\n", p);
+ switch (opt) {
+ case Opt_type:
+ if (ctx->flavour == result.uint_32) /* no-op */
return 0;
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ pr_err("ufstype can't be changed during remount\n");
+ return -EINVAL;
}
+ if (!ctx->flavour) {
+ pr_err("conflicting ufstype options\n");
+ return -EINVAL;
+ }
+ ctx->flavour = result.uint_32;
+ break;
+ case Opt_onerror:
+ ctx->on_err = result.uint_32;
+ break;
+ default:
+ return -EINVAL;
}
- return 1;
+ return 0;
}
/*
@@ -474,7 +424,7 @@ static void ufs_setup_cstotal(struct super_block *sb)
struct ufs_super_block_first *usb1;
struct ufs_super_block_second *usb2;
struct ufs_super_block_third *usb3;
- unsigned mtype = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
+ unsigned mtype = sbi->s_flavour;
UFSD("ENTER, mtype=%u\n", mtype);
usb1 = ubh_get_usb_first(uspi);
@@ -580,7 +530,7 @@ failed:
*/
static void ufs_put_cstotal(struct super_block *sb)
{
- unsigned mtype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE;
+ unsigned mtype = UFS_SB(sb)->s_flavour;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct ufs_super_block_first *usb1;
struct ufs_super_block_second *usb2;
@@ -764,8 +714,10 @@ static u64 ufs_max_bytes(struct super_block *sb)
return res << uspi->s_bshift;
}
-static int ufs_fill_super(struct super_block *sb, void *data, int silent)
+static int ufs_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct ufs_fs_context *ctx = fc->fs_private;
+ int silent = fc->sb_flags & SB_SILENT;
struct ufs_sb_info * sbi;
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
@@ -803,24 +755,18 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&sbi->s_lock);
spin_lock_init(&sbi->work_lock);
INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
- /*
- * Set default mount options
- * Parse mount options
- */
- sbi->s_mount_opt = 0;
- ufs_set_opt (sbi->s_mount_opt, ONERROR_LOCK);
- if (!ufs_parse_options ((char *) data, &sbi->s_mount_opt)) {
- pr_err("wrong mount options\n");
- goto failed;
- }
- if (!(sbi->s_mount_opt & UFS_MOUNT_UFSTYPE)) {
+
+ sbi->s_flavour = ctx->flavour;
+ sbi->s_on_err = ctx->on_err;
+
+ if (!sbi->s_flavour) {
if (!silent)
pr_err("You didn't specify the type of your ufs filesystem\n\n"
"mount -t ufs -o ufstype="
"sun|sunx86|44bsd|ufs2|5xbsd|old|hp|nextstep|nextstep-cd|openstep ...\n\n"
">>>WARNING<<< Wrong ufstype may corrupt your filesystem, "
"default is ufstype=old\n");
- ufs_set_opt (sbi->s_mount_opt, UFSTYPE_OLD);
+ sbi->s_flavour = UFS_MOUNT_UFSTYPE_OLD;
}
uspi = kzalloc(sizeof(struct ufs_sb_private_info), GFP_KERNEL);
@@ -836,7 +782,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_time_min = S32_MIN;
sb->s_time_max = S32_MAX;
- switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) {
+ switch (sbi->s_flavour) {
case UFS_MOUNT_UFSTYPE_44BSD:
UFSD("ufstype=44bsd\n");
uspi->s_fsize = block_size = 512;
@@ -1035,9 +981,9 @@ again:
goto magic_found;
}
- if ((((sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) == UFS_MOUNT_UFSTYPE_NEXTSTEP)
- || ((sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) == UFS_MOUNT_UFSTYPE_NEXTSTEP_CD)
- || ((sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) == UFS_MOUNT_UFSTYPE_OPENSTEP))
+ if ((sbi->s_flavour == UFS_MOUNT_UFSTYPE_NEXTSTEP
+ || sbi->s_flavour == UFS_MOUNT_UFSTYPE_NEXTSTEP_CD
+ || sbi->s_flavour == UFS_MOUNT_UFSTYPE_OPENSTEP)
&& uspi->s_sbbase < 256) {
ubh_brelse_uspi(uspi);
ubh = NULL;
@@ -1237,8 +1183,8 @@ magic_found:
uspi->s_bpf = uspi->s_fsize << 3;
uspi->s_bpfshift = uspi->s_fshift + 3;
uspi->s_bpfmask = uspi->s_bpf - 1;
- if ((sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) == UFS_MOUNT_UFSTYPE_44BSD ||
- (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) == UFS_MOUNT_UFSTYPE_UFS2)
+ if (sbi->s_flavour == UFS_MOUNT_UFSTYPE_44BSD ||
+ sbi->s_flavour == UFS_MOUNT_UFSTYPE_UFS2)
uspi->s_maxsymlinklen =
fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen);
@@ -1290,13 +1236,15 @@ failed_nomem:
return -ENOMEM;
}
-static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
+static int ufs_reconfigure(struct fs_context *fc)
{
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
struct ufs_super_block_third * usb3;
- unsigned new_mount_opt, ufstype;
- unsigned flags;
+ struct ufs_fs_context *ctx = fc->fs_private;
+ struct super_block *sb = fc->root->d_sb;
+ unsigned int ufstype;
+ unsigned int flags;
sync_filesystem(sb);
mutex_lock(&UFS_SB(sb)->s_lock);
@@ -1305,27 +1253,10 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
usb1 = ubh_get_usb_first(uspi);
usb3 = ubh_get_usb_third(uspi);
- /*
- * Allow the "check" option to be passed as a remount option.
- * It is not possible to change ufstype option during remount
- */
- ufstype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE;
- new_mount_opt = 0;
- ufs_set_opt (new_mount_opt, ONERROR_LOCK);
- if (!ufs_parse_options (data, &new_mount_opt)) {
- mutex_unlock(&UFS_SB(sb)->s_lock);
- return -EINVAL;
- }
- if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
- new_mount_opt |= ufstype;
- } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
- pr_err("ufstype can't be changed during remount\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
- return -EINVAL;
- }
+ ufstype = UFS_SB(sb)->s_flavour;
- if ((bool)(*mount_flags & SB_RDONLY) == sb_rdonly(sb)) {
- UFS_SB(sb)->s_mount_opt = new_mount_opt;
+ if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb)) {
+ UFS_SB(sb)->s_on_err = ctx->on_err;
mutex_unlock(&UFS_SB(sb)->s_lock);
return 0;
}
@@ -1333,7 +1264,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
/*
* fs was mouted as rw, remounting ro
*/
- if (*mount_flags & SB_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
ufs_put_super_internal(sb);
usb1->fs_time = ufs_get_seconds(sb);
if ((flags & UFS_ST_MASK) == UFS_ST_SUN
@@ -1369,7 +1300,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
sb->s_flags &= ~SB_RDONLY;
#endif
}
- UFS_SB(sb)->s_mount_opt = new_mount_opt;
+ UFS_SB(sb)->s_on_err = ctx->on_err;
mutex_unlock(&UFS_SB(sb)->s_lock);
return 0;
}
@@ -1377,19 +1308,19 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
static int ufs_show_options(struct seq_file *seq, struct dentry *root)
{
struct ufs_sb_info *sbi = UFS_SB(root->d_sb);
- unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
- const struct match_token *tp = tokens;
+ unsigned mval = sbi->s_flavour;
+ const struct constant_table *tp;
- while (tp->token != Opt_onerror_panic && tp->token != mval)
+ tp = ufs_param_ufstype;
+ while (tp->value && tp->value != mval)
++tp;
- BUG_ON(tp->token == Opt_onerror_panic);
- seq_printf(seq, ",%s", tp->pattern);
+ seq_printf(seq, ",ufstype=%s", tp->name);
- mval = sbi->s_mount_opt & UFS_MOUNT_ONERROR;
- while (tp->token != Opt_err && tp->token != mval)
+ tp = ufs_param_onerror;
+ mval = sbi->s_on_err;
+ while (tp->value && tp->value != mval)
++tp;
- BUG_ON(tp->token == Opt_err);
- seq_printf(seq, ",%s", tp->pattern);
+ seq_printf(seq, ",onerror=%s", tp->name);
return 0;
}
@@ -1483,21 +1414,57 @@ static const struct super_operations ufs_super_ops = {
.put_super = ufs_put_super,
.sync_fs = ufs_sync_fs,
.statfs = ufs_statfs,
- .remount_fs = ufs_remount,
.show_options = ufs_show_options,
};
-static struct dentry *ufs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int ufs_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, ufs_fill_super);
+}
+
+static void ufs_free_fc(struct fs_context *fc)
+{
+ kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations ufs_context_ops = {
+ .parse_param = ufs_parse_param,
+ .get_tree = ufs_get_tree,
+ .reconfigure = ufs_reconfigure,
+ .free = ufs_free_fc,
+};
+
+static int ufs_init_fs_context(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
+ struct ufs_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ struct super_block *sb = fc->root->d_sb;
+ struct ufs_sb_info *sbi = UFS_SB(sb);
+
+ ctx->flavour = sbi->s_flavour;
+ ctx->on_err = sbi->s_on_err;
+ } else {
+ ctx->flavour = 0;
+ ctx->on_err = UFS_MOUNT_ONERROR_LOCK;
+ }
+
+ fc->fs_private = ctx;
+ fc->ops = &ufs_context_ops;
+
+ return 0;
}
static struct file_system_type ufs_fs_type = {
.owner = THIS_MODULE,
.name = "ufs",
- .mount = ufs_mount,
.kill_sb = kill_block_super,
+ .init_fs_context = ufs_init_fs_context,
+ .parameters = ufs_param_spec,
.fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("ufs");
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index e7df65dd4351..788e025056b2 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -24,7 +24,8 @@ struct ufs_sb_info {
struct ufs_cg_private_info * s_ucpi[UFS_MAX_GROUP_LOADED];
unsigned s_cgno[UFS_MAX_GROUP_LOADED];
unsigned short s_cg_loaded;
- unsigned s_mount_opt;
+ unsigned s_flavour;
+ unsigned s_on_err;
struct super_block *sb;
int work_queued; /* non-zero if the delayed work is queued */
struct delayed_work sync_work; /* FS sync delayed work */
@@ -52,13 +53,11 @@ struct ufs_inode_info {
};
/* mount options */
-#define UFS_MOUNT_ONERROR 0x0000000F
#define UFS_MOUNT_ONERROR_PANIC 0x00000001
#define UFS_MOUNT_ONERROR_LOCK 0x00000002
#define UFS_MOUNT_ONERROR_UMOUNT 0x00000004
#define UFS_MOUNT_ONERROR_REPAIR 0x00000008
-#define UFS_MOUNT_UFSTYPE 0x0000FFF0
#define UFS_MOUNT_UFSTYPE_OLD 0x00000010
#define UFS_MOUNT_UFSTYPE_44BSD 0x00000020
#define UFS_MOUNT_UFSTYPE_SUN 0x00000040
@@ -70,10 +69,6 @@ struct ufs_inode_info {
#define UFS_MOUNT_UFSTYPE_UFS2 0x00001000
#define UFS_MOUNT_UFSTYPE_SUNOS 0x00002000
-#define ufs_clear_opt(o,opt) o &= ~UFS_MOUNT_##opt
-#define ufs_set_opt(o,opt) o |= UFS_MOUNT_##opt
-#define ufs_test_opt(o,opt) ((o) & UFS_MOUNT_##opt)
-
/*
* Debug code
*/
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 26a04a783489..63151feb9c3f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -436,6 +436,25 @@ allocate_blocks:
return 0;
}
+static bool
+xfs_ioend_needs_wq_completion(
+ struct iomap_ioend *ioend)
+{
+ /* Changing inode size requires a transaction. */
+ if (xfs_ioend_is_append(ioend))
+ return true;
+
+ /* Extent manipulation requires a transaction. */
+ if (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))
+ return true;
+
+ /* Page cache invalidation cannot be done in irq context. */
+ if (ioend->io_flags & IOMAP_IOEND_DONTCACHE)
+ return true;
+
+ return false;
+}
+
static int
xfs_submit_ioend(
struct iomap_writepage_ctx *wpc,
@@ -460,8 +479,7 @@ xfs_submit_ioend(
memalloc_nofs_restore(nofs_flag);
/* send ioends that might require a transaction to the completion wq */
- if (xfs_ioend_is_append(ioend) ||
- (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)))
+ if (xfs_ioend_needs_wq_completion(ioend))
ioend->io_bio.bi_end_io = xfs_end_bio;
if (status)
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index d613a4094db6..9c00fc5baa30 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -290,8 +290,6 @@ xfs_zone_gc_query_cb(
return 0;
}
-#define cmp_int(l, r) ((l > r) - (l < r))
-
static int
xfs_zone_gc_rmap_rec_cmp(
const void *a,