summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c5
-rw-r--r--fs/afs/write.c9
-rw-r--r--fs/bcachefs/util.h3
-rw-r--r--fs/cachefiles/io.c16
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/configfs/Kconfig1
-rw-r--r--fs/dax.c5
-rw-r--r--fs/erofs/fscache.c6
-rw-r--r--fs/exec.c69
-rw-r--r--fs/fuse/dev.c182
-rw-r--r--fs/fuse/dev_uring.c34
-rw-r--r--fs/fuse/dir.c46
-rw-r--r--fs/fuse/file.c474
-rw-r--r--fs/fuse/fuse_dev_i.h9
-rw-r--r--fs/fuse/fuse_i.h10
-rw-r--r--fs/fuse/inode.c11
-rw-r--r--fs/fuse/readdir.c36
-rw-r--r--fs/gfs2/ops_fstype.c4
-rw-r--r--fs/gfs2/sys.c1
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/jfs/jfs_metapage.c106
-rw-r--r--fs/netfs/buffered_read.c56
-rw-r--r--fs/netfs/buffered_write.c5
-rw-r--r--fs/netfs/direct_read.c16
-rw-r--r--fs/netfs/direct_write.c12
-rw-r--r--fs/netfs/fscache_io.c10
-rw-r--r--fs/netfs/internal.h42
-rw-r--r--fs/netfs/main.c1
-rw-r--r--fs/netfs/misc.c219
-rw-r--r--fs/netfs/objects.c48
-rw-r--r--fs/netfs/read_collect.c199
-rw-r--r--fs/netfs/read_pgpriv2.c4
-rw-r--r--fs/netfs/read_retry.c26
-rw-r--r--fs/netfs/read_single.c6
-rw-r--r--fs/netfs/write_collect.c83
-rw-r--r--fs/netfs/write_issue.c38
-rw-r--r--fs/netfs/write_retry.c19
-rw-r--r--fs/nfs/fscache.c1
-rw-r--r--fs/nilfs2/btree.c4
-rw-r--r--fs/nilfs2/direct.c3
-rw-r--r--fs/nilfs2/mdt.c2
-rw-r--r--fs/nilfs2/segment.c16
-rw-r--r--fs/nilfs2/segment.h1
-rw-r--r--fs/ntfs3/file.c31
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/filecheck.c2
-rw-r--r--fs/ocfs2/quota_local.c2
-rw-r--r--fs/ocfs2/stackglue.c3
-rw-r--r--fs/pipe.c3
-rw-r--r--fs/proc/base.c12
-rw-r--r--fs/proc/page.c161
-rw-r--r--fs/proc/task_mmu.c29
-rw-r--r--fs/proc/task_nommu.c4
-rw-r--r--fs/smb/client/cifsproto.h3
-rw-r--r--fs/smb/client/cifssmb.c4
-rw-r--r--fs/smb/client/file.c10
-rw-r--r--fs/smb/client/smb2pdu.c4
-rw-r--r--fs/squashfs/Kconfig21
-rw-r--r--fs/squashfs/block.c28
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/xfs/xfs_aops.c22
-rw-r--r--fs/xfs/xfs_zone_gc.c2
62 files changed, 1056 insertions, 1138 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 1286d96a29bc..862164181bac 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -59,7 +59,7 @@ static void v9fs_issue_write(struct netfs_io_subrequest *subreq)
len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err);
if (len > 0)
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
- netfs_write_subrequest_terminated(subreq, len ?: err, false);
+ netfs_write_subrequest_terminated(subreq, len ?: err);
}
/**
@@ -77,7 +77,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
/* if we just extended the file size, any portion not in
* cache won't be on server and is zeroes */
- if (subreq->rreq->origin != NETFS_DIO_READ)
+ if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+ subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (pos + total >= i_size_read(rreq->inode))
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 18b0a9f1615e..2e7526ea883a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -120,17 +120,17 @@ static void afs_issue_write_worker(struct work_struct *work)
#if 0 // Error injection
if (subreq->debug_index == 3)
- return netfs_write_subrequest_terminated(subreq, -ENOANO, false);
+ return netfs_write_subrequest_terminated(subreq, -ENOANO);
if (!subreq->retry_count) {
set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN);
}
#endif
op = afs_alloc_operation(wreq->netfs_priv, vnode->volume);
if (IS_ERR(op))
- return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN);
afs_op_set_vnode(op, 0, vnode);
op->file[0].dv_delta = 1;
@@ -166,7 +166,7 @@ static void afs_issue_write_worker(struct work_struct *work)
break;
}
- netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false);
+ netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len);
}
void afs_issue_write(struct netfs_io_subrequest *subreq)
@@ -202,6 +202,7 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st
case NETFS_READ_GAPS:
case NETFS_READ_SINGLE:
case NETFS_READ_FOR_WRITE:
+ case NETFS_UNBUFFERED_READ:
case NETFS_DIO_READ:
return;
default:
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 25cf61ebd40c..0a4b1d433621 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -17,6 +17,7 @@
#include <linux/random.h>
#include <linux/ratelimit.h>
#include <linux/slab.h>
+#include <linux/sort.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
@@ -672,8 +673,6 @@ static inline void percpu_memset(void __percpu *p, int c, size_t bytes)
u64 *bch2_acc_percpu_u64s(u64 __percpu *, unsigned);
-#define cmp_int(l, r) ((l > r) - (l < r))
-
static inline int u8_cmp(u8 l, u8 r)
{
return cmp_int(l, r);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 92058ae43488..c08e4a66ac07 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -63,7 +63,7 @@ static void cachefiles_read_complete(struct kiocb *iocb, long ret)
ret = -ESTALE;
}
- ki->term_func(ki->term_func_priv, ret, ki->was_async);
+ ki->term_func(ki->term_func_priv, ret);
}
cachefiles_put_kiocb(ki);
@@ -188,7 +188,7 @@ in_progress:
presubmission_error:
if (term_func)
- term_func(term_func_priv, ret < 0 ? ret : skipped, false);
+ term_func(term_func_priv, ret < 0 ? ret : skipped);
return ret;
}
@@ -271,7 +271,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
if (ki->term_func)
- ki->term_func(ki->term_func_priv, ret, ki->was_async);
+ ki->term_func(ki->term_func_priv, ret);
cachefiles_put_kiocb(ki);
}
@@ -301,7 +301,7 @@ int __cachefiles_write(struct cachefiles_object *object,
ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
if (!ki) {
if (term_func)
- term_func(term_func_priv, -ENOMEM, false);
+ term_func(term_func_priv, -ENOMEM);
return -ENOMEM;
}
@@ -366,7 +366,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
{
if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
if (term_func)
- term_func(term_func_priv, -ENOBUFS, false);
+ term_func(term_func_priv, -ENOBUFS);
trace_netfs_sreq(term_func_priv, netfs_sreq_trace_cache_nowrite);
return -ENOBUFS;
}
@@ -665,7 +665,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
pre = CACHEFILES_DIO_BLOCK_SIZE - off;
if (pre >= len) {
fscache_count_dio_misfit();
- netfs_write_subrequest_terminated(subreq, len, false);
+ netfs_write_subrequest_terminated(subreq, len);
return;
}
subreq->transferred += pre;
@@ -691,7 +691,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
len -= post;
if (len == 0) {
fscache_count_dio_misfit();
- netfs_write_subrequest_terminated(subreq, post, false);
+ netfs_write_subrequest_terminated(subreq, post);
return;
}
iov_iter_truncate(&subreq->io_iter, len);
@@ -703,7 +703,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
&start, &len, len, true);
cachefiles_end_secure(cache, saved_cred);
if (ret < 0) {
- netfs_write_subrequest_terminated(subreq, ret, false);
+ netfs_write_subrequest_terminated(subreq, ret);
return;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 29be367905a1..b95c4cb21c13 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -238,6 +238,7 @@ static void finish_netfs_read(struct ceph_osd_request *req)
if (sparse && err > 0)
err = ceph_sparse_ext_map_end(op);
if (err < subreq->len &&
+ subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (IS_ENCRYPTED(inode) && err > 0) {
@@ -281,7 +282,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
size_t len;
int mode;
- if (rreq->origin != NETFS_DIO_READ)
+ if (rreq->origin != NETFS_UNBUFFERED_READ &&
+ rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
@@ -539,7 +541,7 @@ static void ceph_set_page_fscache(struct page *page)
folio_start_private_2(page_folio(page)); /* [DEPRECATED] */
}
-static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async)
+static void ceph_fscache_write_terminated(void *priv, ssize_t error)
{
struct inode *inode = priv;
diff --git a/fs/configfs/Kconfig b/fs/configfs/Kconfig
index 272b64456999..1fcd761fe7be 100644
--- a/fs/configfs/Kconfig
+++ b/fs/configfs/Kconfig
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config CONFIGFS_FS
tristate "Userspace-driven configuration filesystem"
- select SYSFS
help
configfs is a RAM-based filesystem that provides the converse
of sysfs's functionality. Where sysfs is a filesystem-based
diff --git a/fs/dax.c b/fs/dax.c
index 676303419e9e..ea0c35794bf9 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -257,7 +257,7 @@ static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry)
wq = dax_entry_waitqueue(xas, entry, &ewait.key);
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
- xas_pause(xas);
+ xas_reset(xas);
xas_unlock_irq(xas);
schedule();
finish_wait(wq, &ewait.wait);
@@ -1422,8 +1422,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
mm_inc_nr_ptes(vma->vm_mm);
}
- pmd_entry = mk_pmd(&zero_folio->page, vmf->vma->vm_page_prot);
- pmd_entry = pmd_mkhuge(pmd_entry);
+ pmd_entry = folio_mk_pmd(zero_folio, vmf->vma->vm_page_prot);
set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
spin_unlock(ptl);
trace_dax_pmd_load_hole(inode, vmf, zero_folio, *entry);
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 9c9129bca346..34517ca9df91 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -102,8 +102,7 @@ static void erofs_fscache_req_io_put(struct erofs_fscache_io *io)
erofs_fscache_req_put(req);
}
-static void erofs_fscache_req_end_io(void *priv,
- ssize_t transferred_or_error, bool was_async)
+static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error)
{
struct erofs_fscache_io *io = priv;
struct erofs_fscache_rq *req = io->private;
@@ -180,8 +179,7 @@ struct erofs_fscache_bio {
struct bio_vec bvecs[BIO_MAX_VECS];
};
-static void erofs_fscache_bio_endio(void *priv,
- ssize_t transferred_or_error, bool was_async)
+static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error)
{
struct erofs_fscache_bio *io = priv;
diff --git a/fs/exec.c b/fs/exec.c
index cfbb2b9ee3c9..1f5fdd2e096e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -78,6 +78,9 @@
#include <trace/events/sched.h>
+/* For vma exec functions. */
+#include "../mm/internal.h"
+
static int bprm_creds_from_file(struct linux_binprm *bprm);
int suid_dumpable = 0;
@@ -182,60 +185,6 @@ static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
flush_cache_page(bprm->vma, pos, page_to_pfn(page));
}
-static int __bprm_mm_init(struct linux_binprm *bprm)
-{
- int err;
- struct vm_area_struct *vma = NULL;
- struct mm_struct *mm = bprm->mm;
-
- bprm->vma = vma = vm_area_alloc(mm);
- if (!vma)
- return -ENOMEM;
- vma_set_anonymous(vma);
-
- if (mmap_write_lock_killable(mm)) {
- err = -EINTR;
- goto err_free;
- }
-
- /*
- * Need to be called with mmap write lock
- * held, to avoid race with ksmd.
- */
- err = ksm_execve(mm);
- if (err)
- goto err_ksm;
-
- /*
- * Place the stack at the largest stack address the architecture
- * supports. Later, we'll move this to an appropriate place. We don't
- * use STACK_TOP because that can depend on attributes which aren't
- * configured yet.
- */
- BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
- vma->vm_end = STACK_TOP_MAX;
- vma->vm_start = vma->vm_end - PAGE_SIZE;
- vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
- vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
-
- err = insert_vm_struct(mm, vma);
- if (err)
- goto err;
-
- mm->stack_vm = mm->total_vm = 1;
- mmap_write_unlock(mm);
- bprm->p = vma->vm_end - sizeof(void *);
- return 0;
-err:
- ksm_exit(mm);
-err_ksm:
- mmap_write_unlock(mm);
-err_free:
- bprm->vma = NULL;
- vm_area_free(vma);
- return err;
-}
-
static bool valid_arg_len(struct linux_binprm *bprm, long len)
{
return len <= MAX_ARG_STRLEN;
@@ -288,12 +237,6 @@ static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
{
}
-static int __bprm_mm_init(struct linux_binprm *bprm)
-{
- bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
- return 0;
-}
-
static bool valid_arg_len(struct linux_binprm *bprm, long len)
{
return len <= bprm->p;
@@ -322,9 +265,13 @@ static int bprm_mm_init(struct linux_binprm *bprm)
bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
task_unlock(current->group_leader);
- err = __bprm_mm_init(bprm);
+#ifndef CONFIG_MMU
+ bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
+#else
+ err = create_init_stack_vma(bprm->mm, &bprm->vma, &bprm->p);
if (err)
goto err;
+#endif
return 0;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6dcbaa218b7a..e80cd8f2c049 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -23,6 +23,7 @@
#include <linux/swap.h>
#include <linux/splice.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#define CREATE_TRACE_POINTS
#include "fuse_trace.h"
@@ -45,7 +46,7 @@ bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list)
return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout);
}
-bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing)
+static bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing)
{
int i;
@@ -816,7 +817,7 @@ static int unlock_request(struct fuse_req *req)
return err;
}
-void fuse_copy_init(struct fuse_copy_state *cs, int write,
+void fuse_copy_init(struct fuse_copy_state *cs, bool write,
struct iov_iter *iter)
{
memset(cs, 0, sizeof(*cs));
@@ -955,10 +956,10 @@ static int fuse_check_folio(struct folio *folio)
* folio that was originally in @pagep will lose a reference and the new
* folio returned in @pagep will carry a reference.
*/
-static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
+static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop)
{
int err;
- struct folio *oldfolio = page_folio(*pagep);
+ struct folio *oldfolio = *foliop;
struct folio *newfolio;
struct pipe_buffer *buf = cs->pipebufs;
@@ -979,7 +980,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
cs->pipebufs++;
cs->nr_segs--;
- if (cs->len != PAGE_SIZE)
+ if (cs->len != folio_size(oldfolio))
goto out_fallback;
if (!pipe_buf_try_steal(cs->pipe, buf))
@@ -1025,7 +1026,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (test_bit(FR_ABORTED, &cs->req->flags))
err = -ENOENT;
else
- *pagep = &newfolio->page;
+ *foliop = newfolio;
spin_unlock(&cs->req->waitq.lock);
if (err) {
@@ -1058,8 +1059,8 @@ out_fallback:
goto out_put_old;
}
-static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
- unsigned offset, unsigned count)
+static int fuse_ref_folio(struct fuse_copy_state *cs, struct folio *folio,
+ unsigned offset, unsigned count)
{
struct pipe_buffer *buf;
int err;
@@ -1067,17 +1068,17 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;
- get_page(page);
+ folio_get(folio);
err = unlock_request(cs->req);
if (err) {
- put_page(page);
+ folio_put(folio);
return err;
}
fuse_copy_finish(cs);
buf = cs->pipebufs;
- buf->page = page;
+ buf->page = &folio->page;
buf->offset = offset;
buf->len = count;
@@ -1089,20 +1090,24 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
}
/*
- * Copy a page in the request to/from the userspace buffer. Must be
+ * Copy a folio in the request to/from the userspace buffer. Must be
* done atomically
*/
-static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
- unsigned offset, unsigned count, int zeroing)
+static int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop,
+ unsigned offset, unsigned count, int zeroing)
{
int err;
- struct page *page = *pagep;
+ struct folio *folio = *foliop;
+ size_t size;
- if (page && zeroing && count < PAGE_SIZE)
- clear_highpage(page);
+ if (folio) {
+ size = folio_size(folio);
+ if (zeroing && count < size)
+ folio_zero_range(folio, 0, size);
+ }
while (count) {
- if (cs->write && cs->pipebufs && page) {
+ if (cs->write && cs->pipebufs && folio) {
/*
* Can't control lifetime of pipe buffers, so always
* copy user pages.
@@ -1112,12 +1117,12 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
if (err)
return err;
} else {
- return fuse_ref_page(cs, page, offset, count);
+ return fuse_ref_folio(cs, folio, offset, count);
}
} else if (!cs->len) {
- if (cs->move_pages && page &&
- offset == 0 && count == PAGE_SIZE) {
- err = fuse_try_move_page(cs, pagep);
+ if (cs->move_folios && folio &&
+ offset == 0 && count == size) {
+ err = fuse_try_move_folio(cs, foliop);
if (err <= 0)
return err;
} else {
@@ -1126,22 +1131,30 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
return err;
}
}
- if (page) {
- void *mapaddr = kmap_local_page(page);
- void *buf = mapaddr + offset;
- offset += fuse_copy_do(cs, &buf, &count);
+ if (folio) {
+ void *mapaddr = kmap_local_folio(folio, offset);
+ void *buf = mapaddr;
+ unsigned int copy = count;
+ unsigned int bytes_copied;
+
+ if (folio_test_highmem(folio) && count > PAGE_SIZE - offset_in_page(offset))
+ copy = PAGE_SIZE - offset_in_page(offset);
+
+ bytes_copied = fuse_copy_do(cs, &buf, &copy);
kunmap_local(mapaddr);
+ offset += bytes_copied;
+ count -= bytes_copied;
} else
offset += fuse_copy_do(cs, NULL, &count);
}
- if (page && !cs->write)
- flush_dcache_page(page);
+ if (folio && !cs->write)
+ flush_dcache_folio(folio);
return 0;
}
-/* Copy pages in the request to/from userspace buffer */
-static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
- int zeroing)
+/* Copy folios in the request to/from userspace buffer */
+static int fuse_copy_folios(struct fuse_copy_state *cs, unsigned nbytes,
+ int zeroing)
{
unsigned i;
struct fuse_req *req = cs->req;
@@ -1151,23 +1164,12 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
int err;
unsigned int offset = ap->descs[i].offset;
unsigned int count = min(nbytes, ap->descs[i].length);
- struct page *orig, *pagep;
- orig = pagep = &ap->folios[i]->page;
-
- err = fuse_copy_page(cs, &pagep, offset, count, zeroing);
+ err = fuse_copy_folio(cs, &ap->folios[i], offset, count, zeroing);
if (err)
return err;
nbytes -= count;
-
- /*
- * fuse_copy_page may have moved a page from a pipe instead of
- * copying into our given page, so update the folios if it was
- * replaced.
- */
- if (pagep != orig)
- ap->folios[i] = page_folio(pagep);
}
return 0;
}
@@ -1197,7 +1199,7 @@ int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
for (i = 0; !err && i < numargs; i++) {
struct fuse_arg *arg = &args[i];
if (i == numargs - 1 && argpages)
- err = fuse_copy_pages(cs, arg->size, zeroing);
+ err = fuse_copy_folios(cs, arg->size, zeroing);
else
err = fuse_copy_one(cs, arg->value, arg->size);
}
@@ -1538,7 +1540,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
if (!user_backed_iter(to))
return -EINVAL;
- fuse_copy_init(&cs, 1, to);
+ fuse_copy_init(&cs, true, to);
return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
}
@@ -1561,7 +1563,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (!bufs)
return -ENOMEM;
- fuse_copy_init(&cs, 1, NULL);
+ fuse_copy_init(&cs, true, NULL);
cs.pipebufs = bufs;
cs.pipe = pipe;
ret = fuse_dev_do_read(fud, in, &cs, len);
@@ -1786,20 +1788,23 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
num = outarg.size;
while (num) {
struct folio *folio;
- struct page *page;
- unsigned int this_num;
+ unsigned int folio_offset;
+ unsigned int nr_bytes;
+ unsigned int nr_pages;
folio = filemap_grab_folio(mapping, index);
err = PTR_ERR(folio);
if (IS_ERR(folio))
goto out_iput;
- page = &folio->page;
- this_num = min_t(unsigned, num, folio_size(folio) - offset);
- err = fuse_copy_page(cs, &page, offset, this_num, 0);
+ folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset;
+ nr_bytes = min_t(unsigned, num, folio_size(folio) - folio_offset);
+ nr_pages = (offset + nr_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0);
if (!folio_test_uptodate(folio) && !err && offset == 0 &&
- (this_num == folio_size(folio) || file_size == end)) {
- folio_zero_segment(folio, this_num, folio_size(folio));
+ (nr_bytes == folio_size(folio) || file_size == end)) {
+ folio_zero_segment(folio, nr_bytes, folio_size(folio));
folio_mark_uptodate(folio);
}
folio_unlock(folio);
@@ -1808,9 +1813,9 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
if (err)
goto out_iput;
- num -= this_num;
+ num -= nr_bytes;
offset = 0;
- index++;
+ index += nr_pages;
}
err = 0;
@@ -1849,7 +1854,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
unsigned int num;
unsigned int offset;
size_t total_len = 0;
- unsigned int num_pages, cur_pages = 0;
+ unsigned int num_pages;
struct fuse_conn *fc = fm->fc;
struct fuse_retrieve_args *ra;
size_t args_size = sizeof(*ra);
@@ -1867,6 +1872,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
num_pages = min(num_pages, fc->max_pages);
+ num = min(num, num_pages << PAGE_SHIFT);
args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0]));
@@ -1887,25 +1893,29 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
index = outarg->offset >> PAGE_SHIFT;
- while (num && cur_pages < num_pages) {
+ while (num) {
struct folio *folio;
- unsigned int this_num;
+ unsigned int folio_offset;
+ unsigned int nr_bytes;
+ unsigned int nr_pages;
folio = filemap_get_folio(mapping, index);
if (IS_ERR(folio))
break;
- this_num = min_t(unsigned, num, PAGE_SIZE - offset);
+ folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset;
+ nr_bytes = min(folio_size(folio) - folio_offset, num);
+ nr_pages = (offset + nr_bytes + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
ap->folios[ap->num_folios] = folio;
- ap->descs[ap->num_folios].offset = offset;
- ap->descs[ap->num_folios].length = this_num;
+ ap->descs[ap->num_folios].offset = folio_offset;
+ ap->descs[ap->num_folios].length = nr_bytes;
ap->num_folios++;
- cur_pages++;
offset = 0;
- num -= this_num;
- total_len += this_num;
- index++;
+ num -= nr_bytes;
+ total_len += nr_bytes;
+ index += nr_pages;
}
ra->inarg.offset = outarg->offset;
ra->inarg.size = total_len;
@@ -2021,11 +2031,24 @@ static int fuse_notify_resend(struct fuse_conn *fc)
return 0;
}
+/*
+ * Increments the fuse connection epoch. This will result of dentries from
+ * previous epochs to be invalidated.
+ *
+ * XXX optimization: add call to shrink_dcache_sb()?
+ */
+static int fuse_notify_inc_epoch(struct fuse_conn *fc)
+{
+ atomic_inc(&fc->epoch);
+
+ return 0;
+}
+
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
- /* Don't try to move pages (yet) */
- cs->move_pages = 0;
+ /* Don't try to move folios (yet) */
+ cs->move_folios = false;
switch (code) {
case FUSE_NOTIFY_POLL:
@@ -2049,6 +2072,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
case FUSE_NOTIFY_RESEND:
return fuse_notify_resend(fc);
+ case FUSE_NOTIFY_INC_EPOCH:
+ return fuse_notify_inc_epoch(fc);
+
default:
fuse_copy_finish(cs);
return -EINVAL;
@@ -2173,7 +2199,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
spin_unlock(&fpq->lock);
cs->req = req;
if (!req->args->page_replace)
- cs->move_pages = 0;
+ cs->move_folios = false;
if (oh.error)
err = nbytes != sizeof(oh) ? -EINVAL : 0;
@@ -2211,7 +2237,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
if (!user_backed_iter(from))
return -EINVAL;
- fuse_copy_init(&cs, 0, from);
+ fuse_copy_init(&cs, false, from);
return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
}
@@ -2285,13 +2311,13 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
}
pipe_unlock(pipe);
- fuse_copy_init(&cs, 0, NULL);
+ fuse_copy_init(&cs, false, NULL);
cs.pipebufs = bufs;
cs.nr_segs = nbuf;
cs.pipe = pipe;
if (flags & SPLICE_F_MOVE)
- cs.move_pages = 1;
+ cs.move_folios = true;
ret = fuse_dev_do_write(fud, &cs, len);
@@ -2602,6 +2628,17 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
}
}
+#ifdef CONFIG_PROC_FS
+static void fuse_dev_show_fdinfo(struct seq_file *seq, struct file *file)
+{
+ struct fuse_dev *fud = fuse_get_dev(file);
+ if (!fud)
+ return;
+
+ seq_printf(seq, "fuse_connection:\t%u\n", fud->fc->dev);
+}
+#endif
+
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
.open = fuse_dev_open,
@@ -2617,6 +2654,9 @@ const struct file_operations fuse_dev_operations = {
#ifdef CONFIG_FUSE_IO_URING
.uring_cmd = fuse_uring_cmd,
#endif
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = fuse_dev_show_fdinfo,
+#endif
};
EXPORT_SYMBOL_GPL(fuse_dev_operations);
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index accdce2977c5..249b210becb1 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -140,6 +140,21 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring)
}
}
+static bool ent_list_request_expired(struct fuse_conn *fc, struct list_head *list)
+{
+ struct fuse_ring_ent *ent;
+ struct fuse_req *req;
+
+ ent = list_first_entry_or_null(list, struct fuse_ring_ent, list);
+ if (!ent)
+ return false;
+
+ req = ent->fuse_req;
+
+ return time_is_before_jiffies(req->create_time +
+ fc->timeout.req_timeout);
+}
+
bool fuse_uring_request_expired(struct fuse_conn *fc)
{
struct fuse_ring *ring = fc->ring;
@@ -157,7 +172,8 @@ bool fuse_uring_request_expired(struct fuse_conn *fc)
spin_lock(&queue->lock);
if (fuse_request_expired(fc, &queue->fuse_req_queue) ||
fuse_request_expired(fc, &queue->fuse_req_bg_queue) ||
- fuse_fpq_processing_expired(fc, queue->fpq.processing)) {
+ ent_list_request_expired(fc, &queue->ent_w_req_queue) ||
+ ent_list_request_expired(fc, &queue->ent_in_userspace)) {
spin_unlock(&queue->lock);
return true;
}
@@ -494,7 +510,7 @@ static void fuse_uring_cancel(struct io_uring_cmd *cmd,
spin_lock(&queue->lock);
if (ent->state == FRRS_AVAILABLE) {
ent->state = FRRS_USERSPACE;
- list_move(&ent->list, &queue->ent_in_userspace);
+ list_move_tail(&ent->list, &queue->ent_in_userspace);
need_cmd_done = true;
ent->cmd = NULL;
}
@@ -577,8 +593,8 @@ static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
if (err)
return err;
- fuse_copy_init(&cs, 0, &iter);
- cs.is_uring = 1;
+ fuse_copy_init(&cs, false, &iter);
+ cs.is_uring = true;
cs.req = req;
return fuse_copy_out_args(&cs, args, ring_in_out.payload_sz);
@@ -607,8 +623,8 @@ static int fuse_uring_args_to_ring(struct fuse_ring *ring, struct fuse_req *req,
return err;
}
- fuse_copy_init(&cs, 1, &iter);
- cs.is_uring = 1;
+ fuse_copy_init(&cs, true, &iter);
+ cs.is_uring = true;
cs.req = req;
if (num_args > 0) {
@@ -714,7 +730,7 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ent,
cmd = ent->cmd;
ent->cmd = NULL;
ent->state = FRRS_USERSPACE;
- list_move(&ent->list, &queue->ent_in_userspace);
+ list_move_tail(&ent->list, &queue->ent_in_userspace);
spin_unlock(&queue->lock);
io_uring_cmd_done(cmd, 0, 0, issue_flags);
@@ -764,7 +780,7 @@ static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent,
clear_bit(FR_PENDING, &req->flags);
ent->fuse_req = req;
ent->state = FRRS_FUSE_REQ;
- list_move(&ent->list, &queue->ent_w_req_queue);
+ list_move_tail(&ent->list, &queue->ent_w_req_queue);
fuse_uring_add_to_pq(ent, req);
}
@@ -1180,7 +1196,7 @@ static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
spin_lock(&queue->lock);
ent->state = FRRS_USERSPACE;
- list_move(&ent->list, &queue->ent_in_userspace);
+ list_move_tail(&ent->list, &queue->ent_in_userspace);
ent->cmd = NULL;
spin_unlock(&queue->lock);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 7d7ed45cb3e9..45b4c3cc1396 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -200,9 +200,14 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
{
struct inode *inode;
struct fuse_mount *fm;
+ struct fuse_conn *fc;
struct fuse_inode *fi;
int ret;
+ fc = get_fuse_conn_super(dir->i_sb);
+ if (entry->d_time < atomic_read(&fc->epoch))
+ goto invalid;
+
inode = d_inode_rcu(entry);
if (inode && fuse_is_bad(inode))
goto invalid;
@@ -412,16 +417,20 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
unsigned int flags)
{
- int err;
struct fuse_entry_out outarg;
+ struct fuse_conn *fc;
struct inode *inode;
struct dentry *newent;
+ int err, epoch;
bool outarg_valid = true;
bool locked;
if (fuse_is_bad(dir))
return ERR_PTR(-EIO);
+ fc = get_fuse_conn_super(dir->i_sb);
+ epoch = atomic_read(&fc->epoch);
+
locked = fuse_lock_inode(dir);
err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
&outarg, &inode);
@@ -443,6 +452,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
goto out_err;
entry = newent ? newent : entry;
+ entry->d_time = epoch;
if (outarg_valid)
fuse_change_entry_timeout(entry, &outarg);
else
@@ -616,7 +626,6 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *entry, struct file *file,
unsigned int flags, umode_t mode, u32 opcode)
{
- int err;
struct inode *inode;
struct fuse_mount *fm = get_fuse_mount(dir);
FUSE_ARGS(args);
@@ -626,11 +635,13 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
struct fuse_entry_out outentry;
struct fuse_inode *fi;
struct fuse_file *ff;
+ int epoch, err;
bool trunc = flags & O_TRUNC;
/* Userspace expects S_IFREG in create mode */
BUG_ON((mode & S_IFMT) != S_IFREG);
+ epoch = atomic_read(&fm->fc->epoch);
forget = fuse_alloc_forget();
err = -ENOMEM;
if (!forget)
@@ -699,6 +710,7 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
}
kfree(forget);
d_instantiate(entry, inode);
+ entry->d_time = epoch;
fuse_change_entry_timeout(entry, &outentry);
fuse_dir_changed(dir);
err = generic_file_open(inode, file);
@@ -785,12 +797,14 @@ static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_moun
struct fuse_entry_out outarg;
struct inode *inode;
struct dentry *d;
- int err;
struct fuse_forget_link *forget;
+ int epoch, err;
if (fuse_is_bad(dir))
return ERR_PTR(-EIO);
+ epoch = atomic_read(&fm->fc->epoch);
+
forget = fuse_alloc_forget();
if (!forget)
return ERR_PTR(-ENOMEM);
@@ -832,10 +846,13 @@ static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_moun
if (IS_ERR(d))
return d;
- if (d)
+ if (d) {
+ d->d_time = epoch;
fuse_change_entry_timeout(d, &outarg);
- else
+ } else {
+ entry->d_time = epoch;
fuse_change_entry_timeout(entry, &outarg);
+ }
fuse_dir_changed(dir);
return d;
@@ -1609,10 +1626,10 @@ static int fuse_permission(struct mnt_idmap *idmap,
return err;
}
-static int fuse_readlink_page(struct inode *inode, struct folio *folio)
+static int fuse_readlink_folio(struct inode *inode, struct folio *folio)
{
struct fuse_mount *fm = get_fuse_mount(inode);
- struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 };
+ struct fuse_folio_desc desc = { .length = folio_size(folio) - 1 };
struct fuse_args_pages ap = {
.num_folios = 1,
.folios = &folio,
@@ -1667,7 +1684,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
if (!folio)
goto out_err;
- err = fuse_readlink_page(inode, folio);
+ err = fuse_readlink_folio(inode, folio);
if (err) {
folio_put(folio);
goto out_err;
@@ -1943,6 +1960,7 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
int err;
bool trust_local_cmtime = is_wb;
bool fault_blocked = false;
+ u64 attr_version;
if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE;
@@ -2027,6 +2045,8 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
inarg.valid |= FATTR_KILL_SUIDGID;
}
+
+ attr_version = fuse_get_attr_version(fm->fc);
fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
err = fuse_simple_request(fm, &args);
if (err) {
@@ -2052,6 +2072,14 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
/* FIXME: clear I_DIRTY_SYNC? */
}
+ if (fi->attr_version > attr_version) {
+ /*
+ * Apply attributes, for example for fsnotify_change(), but set
+ * attribute timeout to zero.
+ */
+ outarg.attr_valid = outarg.attr_valid_nsec = 0;
+ }
+
fuse_change_attributes_common(inode, &outarg.attr, NULL,
ATTR_TIMEOUT(&outarg),
fuse_get_cache_mask(inode), 0);
@@ -2257,7 +2285,7 @@ void fuse_init_dir(struct inode *inode)
static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
{
- int err = fuse_readlink_page(folio->mapping->host, folio);
+ int err = fuse_readlink_folio(folio->mapping->host, folio);
if (!err)
folio_mark_uptodate(folio);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 754378dd9f71..f102afc03359 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -415,89 +415,11 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
struct fuse_writepage_args {
struct fuse_io_args ia;
- struct rb_node writepages_entry;
struct list_head queue_entry;
- struct fuse_writepage_args *next;
struct inode *inode;
struct fuse_sync_bucket *bucket;
};
-static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
- pgoff_t idx_from, pgoff_t idx_to)
-{
- struct rb_node *n;
-
- n = fi->writepages.rb_node;
-
- while (n) {
- struct fuse_writepage_args *wpa;
- pgoff_t curr_index;
-
- wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
- WARN_ON(get_fuse_inode(wpa->inode) != fi);
- curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
- if (idx_from >= curr_index + wpa->ia.ap.num_folios)
- n = n->rb_right;
- else if (idx_to < curr_index)
- n = n->rb_left;
- else
- return wpa;
- }
- return NULL;
-}
-
-/*
- * Check if any page in a range is under writeback
- */
-static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
- pgoff_t idx_to)
-{
- struct fuse_inode *fi = get_fuse_inode(inode);
- bool found;
-
- if (RB_EMPTY_ROOT(&fi->writepages))
- return false;
-
- spin_lock(&fi->lock);
- found = fuse_find_writeback(fi, idx_from, idx_to);
- spin_unlock(&fi->lock);
-
- return found;
-}
-
-static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
-{
- return fuse_range_is_writeback(inode, index, index);
-}
-
-/*
- * Wait for page writeback to be completed.
- *
- * Since fuse doesn't rely on the VM writeback tracking, this has to
- * use some other means.
- */
-static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
-{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
-}
-
-static inline bool fuse_folio_is_writeback(struct inode *inode,
- struct folio *folio)
-{
- pgoff_t last = folio_next_index(folio) - 1;
- return fuse_range_is_writeback(inode, folio_index(folio), last);
-}
-
-static void fuse_wait_on_folio_writeback(struct inode *inode,
- struct folio *folio)
-{
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- wait_event(fi->page_waitq, !fuse_folio_is_writeback(inode, folio));
-}
-
/*
* Wait for all pending writepages on the inode to finish.
*
@@ -532,10 +454,6 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (err)
return err;
- inode_lock(inode);
- fuse_sync_writes(inode);
- inode_unlock(inode);
-
err = filemap_check_errors(file->f_mapping);
if (err)
return err;
@@ -875,7 +793,7 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio)
struct inode *inode = folio->mapping->host;
struct fuse_mount *fm = get_fuse_mount(inode);
loff_t pos = folio_pos(folio);
- struct fuse_folio_desc desc = { .length = PAGE_SIZE };
+ struct fuse_folio_desc desc = { .length = folio_size(folio) };
struct fuse_io_args ia = {
.ap.args.page_zeroing = true,
.ap.args.out_pages = true,
@@ -886,13 +804,6 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio)
ssize_t res;
u64 attr_ver;
- /*
- * With the temporary pages that are used to complete writeback, we can
- * have writeback that extends beyond the lifetime of the folio. So
- * make sure we read a properly synced folio.
- */
- fuse_wait_on_folio_writeback(inode, folio);
-
attr_ver = fuse_get_attr_version(fm->fc);
/* Don't overflow end offset */
@@ -965,14 +876,13 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
fuse_io_free(ia);
}
-static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
+static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file,
+ unsigned int count)
{
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
struct fuse_args_pages *ap = &ia->ap;
loff_t pos = folio_pos(ap->folios[0]);
- /* Currently, all folios in FUSE are one page */
- size_t count = ap->num_folios << PAGE_SHIFT;
ssize_t res;
int err;
@@ -1005,17 +915,13 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
static void fuse_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->mapping->host;
- struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
unsigned int max_pages, nr_pages;
- pgoff_t first = readahead_index(rac);
- pgoff_t last = first + readahead_count(rac) - 1;
+ struct folio *folio = NULL;
if (fuse_is_bad(inode))
return;
- wait_event(fi->page_waitq, !fuse_range_is_writeback(inode, first, last));
-
max_pages = min_t(unsigned int, fc->max_pages,
fc->max_read / PAGE_SIZE);
@@ -1033,8 +939,8 @@ static void fuse_readahead(struct readahead_control *rac)
while (nr_pages) {
struct fuse_io_args *ia;
struct fuse_args_pages *ap;
- struct folio *folio;
unsigned cur_pages = min(max_pages, nr_pages);
+ unsigned int pages = 0;
if (fc->num_background >= fc->congestion_threshold &&
rac->ra->async_size >= readahead_count(rac))
@@ -1046,10 +952,12 @@ static void fuse_readahead(struct readahead_control *rac)
ia = fuse_io_alloc(NULL, cur_pages);
if (!ia)
- return;
+ break;
ap = &ia->ap;
- while (ap->num_folios < cur_pages) {
+ while (pages < cur_pages) {
+ unsigned int folio_pages;
+
/*
* This returns a folio with a ref held on it.
* The ref needs to be held until the request is
@@ -1057,13 +965,31 @@ static void fuse_readahead(struct readahead_control *rac)
* fuse_try_move_page()) drops the ref after it's
* replaced in the page cache.
*/
- folio = __readahead_folio(rac);
+ if (!folio)
+ folio = __readahead_folio(rac);
+
+ folio_pages = folio_nr_pages(folio);
+ if (folio_pages > cur_pages - pages) {
+ /*
+ * Large folios belonging to fuse will never
+ * have more pages than max_pages.
+ */
+ WARN_ON(!pages);
+ break;
+ }
+
ap->folios[ap->num_folios] = folio;
ap->descs[ap->num_folios].length = folio_size(folio);
ap->num_folios++;
+ pages += folio_pages;
+ folio = NULL;
}
- fuse_send_readpages(ia, rac->file);
- nr_pages -= cur_pages;
+ fuse_send_readpages(ia, rac->file, pages << PAGE_SHIFT);
+ nr_pages -= pages;
+ }
+ if (folio) {
+ folio_end_read(folio, false);
+ folio_put(folio);
}
}
@@ -1181,7 +1107,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
int err;
for (i = 0; i < ap->num_folios; i++)
- fuse_wait_on_folio_writeback(inode, ap->folios[i]);
+ folio_wait_writeback(ap->folios[i]);
fuse_write_args_fill(ia, ff, pos, count);
ia->write.in.flags = fuse_write_flags(iocb);
@@ -1226,27 +1152,24 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
unsigned offset = pos & (PAGE_SIZE - 1);
- unsigned int nr_pages = 0;
size_t count = 0;
- int err;
+ unsigned int num;
+ int err = 0;
+
+ num = min(iov_iter_count(ii), fc->max_write);
+ num = min(num, max_pages << PAGE_SHIFT);
ap->args.in_pages = true;
ap->descs[0].offset = offset;
- do {
+ while (num) {
size_t tmp;
struct folio *folio;
pgoff_t index = pos >> PAGE_SHIFT;
- size_t bytes = min_t(size_t, PAGE_SIZE - offset,
- iov_iter_count(ii));
-
- bytes = min_t(size_t, bytes, fc->max_write - count);
+ unsigned int bytes;
+ unsigned int folio_offset;
again:
- err = -EFAULT;
- if (fault_in_iov_iter_readable(ii, bytes))
- break;
-
folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
mapping_gfp_mask(mapping));
if (IS_ERR(folio)) {
@@ -1257,29 +1180,42 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
if (mapping_writably_mapped(mapping))
flush_dcache_folio(folio);
- tmp = copy_folio_from_iter_atomic(folio, offset, bytes, ii);
+ folio_offset = ((index - folio->index) << PAGE_SHIFT) + offset;
+ bytes = min(folio_size(folio) - folio_offset, num);
+
+ tmp = copy_folio_from_iter_atomic(folio, folio_offset, bytes, ii);
flush_dcache_folio(folio);
if (!tmp) {
folio_unlock(folio);
folio_put(folio);
+
+ /*
+ * Ensure forward progress by faulting in
+ * while not holding the folio lock:
+ */
+ if (fault_in_iov_iter_readable(ii, bytes)) {
+ err = -EFAULT;
+ break;
+ }
+
goto again;
}
- err = 0;
ap->folios[ap->num_folios] = folio;
+ ap->descs[ap->num_folios].offset = folio_offset;
ap->descs[ap->num_folios].length = tmp;
ap->num_folios++;
- nr_pages++;
count += tmp;
pos += tmp;
+ num -= tmp;
offset += tmp;
- if (offset == PAGE_SIZE)
+ if (offset == folio_size(folio))
offset = 0;
- /* If we copied full page, mark it uptodate */
- if (tmp == PAGE_SIZE)
+ /* If we copied full folio, mark it uptodate */
+ if (tmp == folio_size(folio))
folio_mark_uptodate(folio);
if (folio_test_uptodate(folio)) {
@@ -1288,10 +1224,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
ia->write.folio_locked = true;
break;
}
- if (!fc->big_writes)
+ if (!fc->big_writes || offset != 0)
break;
- } while (iov_iter_count(ii) && count < fc->max_write &&
- nr_pages < max_pages && offset == 0);
+ }
return count > 0 ? count : err;
}
@@ -1638,7 +1573,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
return res;
}
}
- if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
+ if (!cuse && filemap_range_has_writeback(mapping, pos, (pos + count - 1))) {
if (!write)
inode_lock(inode);
fuse_sync_writes(inode);
@@ -1835,38 +1770,34 @@ static ssize_t fuse_splice_write(struct pipe_inode_info *pipe, struct file *out,
static void fuse_writepage_free(struct fuse_writepage_args *wpa)
{
struct fuse_args_pages *ap = &wpa->ia.ap;
- int i;
if (wpa->bucket)
fuse_sync_bucket_dec(wpa->bucket);
- for (i = 0; i < ap->num_folios; i++)
- folio_put(ap->folios[i]);
-
fuse_file_put(wpa->ia.ff, false);
kfree(ap->folios);
kfree(wpa);
}
-static void fuse_writepage_finish_stat(struct inode *inode, struct folio *folio)
-{
- struct backing_dev_info *bdi = inode_to_bdi(inode);
-
- dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- node_stat_sub_folio(folio, NR_WRITEBACK_TEMP);
- wb_writeout_inc(&bdi->wb);
-}
-
static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
{
struct fuse_args_pages *ap = &wpa->ia.ap;
struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;
- for (i = 0; i < ap->num_folios; i++)
- fuse_writepage_finish_stat(inode, ap->folios[i]);
+ for (i = 0; i < ap->num_folios; i++) {
+ /*
+ * Benchmarks showed that ending writeback within the
+ * scope of the fi->lock alleviates xarray lock
+ * contention and noticeably improves performance.
+ */
+ folio_end_writeback(ap->folios[i]);
+ dec_wb_stat(&bdi->wb, WB_WRITEBACK);
+ wb_writeout_inc(&bdi->wb);
+ }
wake_up(&fi->page_waitq);
}
@@ -1877,13 +1808,15 @@ static void fuse_send_writepage(struct fuse_mount *fm,
__releases(fi->lock)
__acquires(fi->lock)
{
- struct fuse_writepage_args *aux, *next;
struct fuse_inode *fi = get_fuse_inode(wpa->inode);
+ struct fuse_args_pages *ap = &wpa->ia.ap;
struct fuse_write_in *inarg = &wpa->ia.write.in;
- struct fuse_args *args = &wpa->ia.ap.args;
- /* Currently, all folios in FUSE are one page */
- __u64 data_size = wpa->ia.ap.num_folios * PAGE_SIZE;
- int err;
+ struct fuse_args *args = &ap->args;
+ __u64 data_size = 0;
+ int err, i;
+
+ for (i = 0; i < ap->num_folios; i++)
+ data_size += ap->descs[i].length;
fi->writectr++;
if (inarg->offset + data_size <= size) {
@@ -1914,19 +1847,8 @@ __acquires(fi->lock)
out_free:
fi->writectr--;
- rb_erase(&wpa->writepages_entry, &fi->writepages);
fuse_writepage_finish(wpa);
spin_unlock(&fi->lock);
-
- /* After rb_erase() aux request list is private */
- for (aux = wpa->next; aux; aux = next) {
- next = aux->next;
- aux->next = NULL;
- fuse_writepage_finish_stat(aux->inode,
- aux->ia.ap.folios[0]);
- fuse_writepage_free(aux);
- }
-
fuse_writepage_free(wpa);
spin_lock(&fi->lock);
}
@@ -1954,43 +1876,6 @@ __acquires(fi->lock)
}
}
-static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root,
- struct fuse_writepage_args *wpa)
-{
- pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
- pgoff_t idx_to = idx_from + wpa->ia.ap.num_folios - 1;
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
-
- WARN_ON(!wpa->ia.ap.num_folios);
- while (*p) {
- struct fuse_writepage_args *curr;
- pgoff_t curr_index;
-
- parent = *p;
- curr = rb_entry(parent, struct fuse_writepage_args,
- writepages_entry);
- WARN_ON(curr->inode != wpa->inode);
- curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
-
- if (idx_from >= curr_index + curr->ia.ap.num_folios)
- p = &(*p)->rb_right;
- else if (idx_to < curr_index)
- p = &(*p)->rb_left;
- else
- return curr;
- }
-
- rb_link_node(&wpa->writepages_entry, parent, p);
- rb_insert_color(&wpa->writepages_entry, root);
- return NULL;
-}
-
-static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
-{
- WARN_ON(fuse_insert_writeback(root, wpa));
-}
-
static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
int error)
{
@@ -2010,41 +1895,6 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
if (!fc->writeback_cache)
fuse_invalidate_attr_mask(inode, FUSE_STATX_MODIFY);
spin_lock(&fi->lock);
- rb_erase(&wpa->writepages_entry, &fi->writepages);
- while (wpa->next) {
- struct fuse_mount *fm = get_fuse_mount(inode);
- struct fuse_write_in *inarg = &wpa->ia.write.in;
- struct fuse_writepage_args *next = wpa->next;
-
- wpa->next = next->next;
- next->next = NULL;
- tree_insert(&fi->writepages, next);
-
- /*
- * Skip fuse_flush_writepages() to make it easy to crop requests
- * based on primary request size.
- *
- * 1st case (trivial): there are no concurrent activities using
- * fuse_set/release_nowrite. Then we're on safe side because
- * fuse_flush_writepages() would call fuse_send_writepage()
- * anyway.
- *
- * 2nd case: someone called fuse_set_nowrite and it is waiting
- * now for completion of all in-flight requests. This happens
- * rarely and no more than once per page, so this should be
- * okay.
- *
- * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
- * of fuse_set_nowrite..fuse_release_nowrite section. The fact
- * that fuse_set_nowrite returned implies that all in-flight
- * requests were completed along with all of their secondary
- * requests. Further primary requests are blocked by negative
- * writectr. Hence there cannot be any in-flight requests and
- * no invocations of fuse_writepage_end() while we're in
- * fuse_set_nowrite..fuse_release_nowrite section.
- */
- fuse_send_writepage(fm, next, inarg->offset + inarg->size);
- }
fi->writectr--;
fuse_writepage_finish(wpa);
spin_unlock(&fi->lock);
@@ -2131,19 +1981,16 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
}
static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio,
- struct folio *tmp_folio, uint32_t folio_index)
+ uint32_t folio_index)
{
struct inode *inode = folio->mapping->host;
struct fuse_args_pages *ap = &wpa->ia.ap;
- folio_copy(tmp_folio, folio);
-
- ap->folios[folio_index] = tmp_folio;
+ ap->folios[folio_index] = folio;
ap->descs[folio_index].offset = 0;
- ap->descs[folio_index].length = PAGE_SIZE;
+ ap->descs[folio_index].length = folio_size(folio);
inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP);
}
static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio,
@@ -2178,18 +2025,12 @@ static int fuse_writepage_locked(struct folio *folio)
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_writepage_args *wpa;
struct fuse_args_pages *ap;
- struct folio *tmp_folio;
struct fuse_file *ff;
- int error = -ENOMEM;
+ int error = -EIO;
- tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0);
- if (!tmp_folio)
- goto err;
-
- error = -EIO;
ff = fuse_write_file_get(fi);
if (!ff)
- goto err_nofile;
+ goto err;
wpa = fuse_writepage_args_setup(folio, ff);
error = -ENOMEM;
@@ -2200,22 +2041,17 @@ static int fuse_writepage_locked(struct folio *folio)
ap->num_folios = 1;
folio_start_writeback(folio);
- fuse_writepage_args_page_fill(wpa, folio, tmp_folio, 0);
+ fuse_writepage_args_page_fill(wpa, folio, 0);
spin_lock(&fi->lock);
- tree_insert(&fi->writepages, wpa);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
- folio_end_writeback(folio);
-
return 0;
err_writepage_args:
fuse_file_put(ff, false);
-err_nofile:
- folio_put(tmp_folio);
err:
mapping_set_error(folio->mapping, error);
return error;
@@ -2225,8 +2061,8 @@ struct fuse_fill_wb_data {
struct fuse_writepage_args *wpa;
struct fuse_file *ff;
struct inode *inode;
- struct folio **orig_folios;
unsigned int max_folios;
+ unsigned int nr_pages;
};
static bool fuse_pages_realloc(struct fuse_fill_wb_data *data)
@@ -2260,69 +2096,11 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
struct fuse_writepage_args *wpa = data->wpa;
struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
- int num_folios = wpa->ia.ap.num_folios;
- int i;
spin_lock(&fi->lock);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
-
- for (i = 0; i < num_folios; i++)
- folio_end_writeback(data->orig_folios[i]);
-}
-
-/*
- * Check under fi->lock if the page is under writeback, and insert it onto the
- * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's
- * one already added for a page at this offset. If there's none, then insert
- * this new request onto the auxiliary list, otherwise reuse the existing one by
- * swapping the new temp page with the old one.
- */
-static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
- struct folio *folio)
-{
- struct fuse_inode *fi = get_fuse_inode(new_wpa->inode);
- struct fuse_writepage_args *tmp;
- struct fuse_writepage_args *old_wpa;
- struct fuse_args_pages *new_ap = &new_wpa->ia.ap;
-
- WARN_ON(new_ap->num_folios != 0);
- new_ap->num_folios = 1;
-
- spin_lock(&fi->lock);
- old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa);
- if (!old_wpa) {
- spin_unlock(&fi->lock);
- return true;
- }
-
- for (tmp = old_wpa->next; tmp; tmp = tmp->next) {
- pgoff_t curr_index;
-
- WARN_ON(tmp->inode != new_wpa->inode);
- curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT;
- if (curr_index == folio->index) {
- WARN_ON(tmp->ia.ap.num_folios != 1);
- swap(tmp->ia.ap.folios[0], new_ap->folios[0]);
- break;
- }
- }
-
- if (!tmp) {
- new_wpa->next = old_wpa->next;
- old_wpa->next = new_wpa;
- }
-
- spin_unlock(&fi->lock);
-
- if (tmp) {
- fuse_writepage_finish_stat(new_wpa->inode,
- folio);
- fuse_writepage_free(new_wpa);
- }
-
- return false;
}
static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio,
@@ -2331,25 +2109,16 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct folio *folio,
{
WARN_ON(!ap->num_folios);
- /*
- * Being under writeback is unlikely but possible. For example direct
- * read to an mmaped fuse file will set the page dirty twice; once when
- * the pages are faulted with get_user_pages(), and then after the read
- * completed.
- */
- if (fuse_folio_is_writeback(data->inode, folio))
- return true;
-
/* Reached max pages */
- if (ap->num_folios == fc->max_pages)
+ if (data->nr_pages + folio_nr_pages(folio) > fc->max_pages)
return true;
/* Reached max write bytes */
- if ((ap->num_folios + 1) * PAGE_SIZE > fc->max_write)
+ if ((data->nr_pages * PAGE_SIZE) + folio_size(folio) > fc->max_write)
return true;
/* Discontinuity */
- if (data->orig_folios[ap->num_folios - 1]->index + 1 != folio_index(folio))
+ if (folio_next_index(ap->folios[ap->num_folios - 1]) != folio->index)
return true;
/* Need to grow the pages array? If so, did the expansion fail? */
@@ -2368,7 +2137,6 @@ static int fuse_writepages_fill(struct folio *folio,
struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
- struct folio *tmp_folio;
int err;
if (!data->ff) {
@@ -2381,56 +2149,27 @@ static int fuse_writepages_fill(struct folio *folio,
if (wpa && fuse_writepage_need_send(fc, folio, ap, data)) {
fuse_writepages_send(data);
data->wpa = NULL;
+ data->nr_pages = 0;
}
- err = -ENOMEM;
- tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0);
- if (!tmp_folio)
- goto out_unlock;
-
- /*
- * The page must not be redirtied until the writeout is completed
- * (i.e. userspace has sent a reply to the write request). Otherwise
- * there could be more than one temporary page instance for each real
- * page.
- *
- * This is ensured by holding the page lock in page_mkwrite() while
- * checking fuse_page_is_writeback(). We already hold the page lock
- * since clear_page_dirty_for_io() and keep it held until we add the
- * request to the fi->writepages list and increment ap->num_folios.
- * After this fuse_page_is_writeback() will indicate that the page is
- * under writeback, so we can release the page lock.
- */
if (data->wpa == NULL) {
err = -ENOMEM;
wpa = fuse_writepage_args_setup(folio, data->ff);
- if (!wpa) {
- folio_put(tmp_folio);
+ if (!wpa)
goto out_unlock;
- }
fuse_file_get(wpa->ia.ff);
data->max_folios = 1;
ap = &wpa->ia.ap;
}
folio_start_writeback(folio);
- fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_folios);
- data->orig_folios[ap->num_folios] = folio;
+ fuse_writepage_args_page_fill(wpa, folio, ap->num_folios);
+ data->nr_pages += folio_nr_pages(folio);
err = 0;
- if (data->wpa) {
- /*
- * Protected by fi->lock against concurrent access by
- * fuse_page_is_writeback().
- */
- spin_lock(&fi->lock);
- ap->num_folios++;
- spin_unlock(&fi->lock);
- } else if (fuse_writepage_add(wpa, folio)) {
+ ap->num_folios++;
+ if (!data->wpa)
data->wpa = wpa;
- } else {
- folio_end_writeback(folio);
- }
out_unlock:
folio_unlock(folio);
@@ -2456,13 +2195,7 @@ static int fuse_writepages(struct address_space *mapping,
data.inode = inode;
data.wpa = NULL;
data.ff = NULL;
-
- err = -ENOMEM;
- data.orig_folios = kcalloc(fc->max_pages,
- sizeof(struct folio *),
- GFP_NOFS);
- if (!data.orig_folios)
- goto out;
+ data.nr_pages = 0;
err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
if (data.wpa) {
@@ -2472,7 +2205,6 @@ static int fuse_writepages(struct address_space *mapping,
if (data.ff)
fuse_file_put(data.ff, false);
- kfree(data.orig_folios);
out:
return err;
}
@@ -2497,8 +2229,6 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
if (IS_ERR(folio))
goto error;
- fuse_wait_on_page_writeback(mapping->host, folio->index);
-
if (folio_test_uptodate(folio) || len >= folio_size(folio))
goto success;
/*
@@ -2561,13 +2291,9 @@ static int fuse_launder_folio(struct folio *folio)
{
int err = 0;
if (folio_clear_dirty_for_io(folio)) {
- struct inode *inode = folio->mapping->host;
-
- /* Serialize with pending writeback for the same page */
- fuse_wait_on_page_writeback(inode, folio->index);
err = fuse_writepage_locked(folio);
if (!err)
- fuse_wait_on_page_writeback(inode, folio->index);
+ folio_wait_writeback(folio);
}
return err;
}
@@ -2611,7 +2337,7 @@ static vm_fault_t fuse_page_mkwrite(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
- fuse_wait_on_folio_writeback(inode, folio);
+ folio_wait_writeback(folio);
return VM_FAULT_LOCKED;
}
@@ -3429,9 +3155,12 @@ static const struct address_space_operations fuse_file_aops = {
void fuse_init_file_inode(struct inode *inode, unsigned int flags)
{
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
inode->i_fop = &fuse_file_operations;
inode->i_data.a_ops = &fuse_file_aops;
+ if (fc->writeback_cache)
+ mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data);
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
@@ -3439,7 +3168,6 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
fi->iocachectr = 0;
init_waitqueue_head(&fi->page_waitq);
init_waitqueue_head(&fi->direct_io_waitq);
- fi->writepages = RB_ROOT;
if (IS_ENABLED(CONFIG_FUSE_DAX))
fuse_dax_inode_init(inode, flags);
diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h
index b3c2e32254ba..5a9bd771a319 100644
--- a/fs/fuse/fuse_dev_i.h
+++ b/fs/fuse/fuse_dev_i.h
@@ -20,7 +20,6 @@ struct fuse_iqueue;
struct fuse_forget_link;
struct fuse_copy_state {
- int write;
struct fuse_req *req;
struct iov_iter *iter;
struct pipe_buffer *pipebufs;
@@ -30,8 +29,9 @@ struct fuse_copy_state {
struct page *pg;
unsigned int len;
unsigned int offset;
- unsigned int move_pages:1;
- unsigned int is_uring:1;
+ bool write:1;
+ bool move_folios:1;
+ bool is_uring:1;
struct {
unsigned int copied_sz; /* copied size into the user buffer */
} ring;
@@ -51,7 +51,7 @@ struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique);
void fuse_dev_end_requests(struct list_head *head);
-void fuse_copy_init(struct fuse_copy_state *cs, int write,
+void fuse_copy_init(struct fuse_copy_state *cs, bool write,
struct iov_iter *iter);
int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs,
unsigned int argpages, struct fuse_arg *args,
@@ -64,7 +64,6 @@ void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req);
bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock);
bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list);
-bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing);
#endif
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d56d4fd956db..b54f4f57789f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -74,8 +74,8 @@ extern struct list_head fuse_conn_list;
extern struct mutex fuse_mutex;
/** Module parameters */
-extern unsigned max_user_bgreq;
-extern unsigned max_user_congthresh;
+extern unsigned int max_user_bgreq;
+extern unsigned int max_user_congthresh;
/* One forget request */
struct fuse_forget_link {
@@ -161,9 +161,6 @@ struct fuse_inode {
/* waitq for direct-io completion */
wait_queue_head_t direct_io_waitq;
-
- /* List of writepage requestst (pending or sent) */
- struct rb_root writepages;
};
/* readdir cache (directory only) */
@@ -636,6 +633,9 @@ struct fuse_conn {
/** Number of fuse_dev's */
atomic_t dev_count;
+ /** Current epoch for up-to-date dentries */
+ atomic_t epoch;
+
struct rcu_head rcu;
/** The user id for this mount */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index fd48e8d37f2e..bfe8d8af46f3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -41,7 +41,7 @@ unsigned int fuse_max_pages_limit = 256;
unsigned int fuse_default_req_timeout;
unsigned int fuse_max_req_timeout;
-unsigned max_user_bgreq;
+unsigned int max_user_bgreq;
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
&max_user_bgreq, 0644);
__MODULE_PARM_TYPE(max_user_bgreq, "uint");
@@ -49,7 +49,7 @@ MODULE_PARM_DESC(max_user_bgreq,
"Global limit for the maximum number of backgrounded requests an "
"unprivileged user can set");
-unsigned max_user_congthresh;
+unsigned int max_user_congthresh;
module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
&max_user_congthresh, 0644);
__MODULE_PARM_TYPE(max_user_congthresh, "uint");
@@ -962,6 +962,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
init_rwsem(&fc->killsb);
refcount_set(&fc->count, 1);
atomic_set(&fc->dev_count, 1);
+ atomic_set(&fc->epoch, 1);
init_waitqueue_head(&fc->blocked_waitq);
fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
INIT_LIST_HEAD(&fc->bg_queue);
@@ -1036,7 +1037,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
}
EXPORT_SYMBOL_GPL(fuse_conn_get);
-static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
+static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned int mode)
{
struct fuse_attr attr;
memset(&attr, 0, sizeof(attr));
@@ -1211,7 +1212,7 @@ static const struct super_operations fuse_super_operations = {
.show_options = fuse_show_options,
};
-static void sanitize_global_limit(unsigned *limit)
+static void sanitize_global_limit(unsigned int *limit)
{
/*
* The default maximum number of async requests is calculated to consume
@@ -1232,7 +1233,7 @@ static int set_global_limit(const char *val, const struct kernel_param *kp)
if (rv)
return rv;
- sanitize_global_limit((unsigned *)kp->arg);
+ sanitize_global_limit((unsigned int *)kp->arg);
return 0;
}
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index edcd6f18a8a8..c2aae2eef086 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -161,6 +161,7 @@ static int fuse_direntplus_link(struct file *file,
struct fuse_conn *fc;
struct inode *inode;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ int epoch;
if (!o->nodeid) {
/*
@@ -190,6 +191,7 @@ static int fuse_direntplus_link(struct file *file,
return -EIO;
fc = get_fuse_conn(dir);
+ epoch = atomic_read(&fc->epoch);
name.hash = full_name_hash(parent, name.name, name.len);
dentry = d_lookup(parent, &name);
@@ -256,6 +258,7 @@ retry:
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
+ dentry->d_time = epoch;
fuse_change_entry_timeout(dentry, o);
dput(dentry);
@@ -332,35 +335,32 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
{
int plus;
ssize_t res;
- struct folio *folio;
struct inode *inode = file_inode(file);
struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_conn *fc = fm->fc;
struct fuse_io_args ia = {};
- struct fuse_args_pages *ap = &ia.ap;
- struct fuse_folio_desc desc = { .length = PAGE_SIZE };
+ struct fuse_args *args = &ia.ap.args;
+ void *buf;
+ size_t bufsize = clamp((unsigned int) ctx->count, PAGE_SIZE, fc->max_pages << PAGE_SHIFT);
u64 attr_version = 0, evict_ctr = 0;
bool locked;
- folio = folio_alloc(GFP_KERNEL, 0);
- if (!folio)
+ buf = kvmalloc(bufsize, GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
+ args->out_args[0].value = buf;
+
plus = fuse_use_readdirplus(inode, ctx);
- ap->args.out_pages = true;
- ap->num_folios = 1;
- ap->folios = &folio;
- ap->descs = &desc;
if (plus) {
attr_version = fuse_get_attr_version(fm->fc);
evict_ctr = fuse_get_evict_ctr(fm->fc);
- fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
- FUSE_READDIRPLUS);
+ fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIRPLUS);
} else {
- fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
- FUSE_READDIR);
+ fuse_read_args_fill(&ia, file, ctx->pos, bufsize, FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
- res = fuse_simple_request(fm, &ap->args);
+ res = fuse_simple_request(fm, args);
fuse_unlock_inode(inode, locked);
if (res >= 0) {
if (!res) {
@@ -369,16 +369,14 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
- res = parse_dirplusfile(folio_address(folio), res,
- file, ctx, attr_version,
+ res = parse_dirplusfile(buf, res, file, ctx, attr_version,
evict_ctr);
} else {
- res = parse_dirfile(folio_address(folio), res, file,
- ctx);
+ res = parse_dirfile(buf, res, file, ctx);
}
}
- folio_put(folio);
+ kvfree(buf);
fuse_invalidate_atime(inode);
return res;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 653f0ff4b057..85c491fcf1a3 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -64,7 +64,10 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
void free_sbd(struct gfs2_sbd *sdp)
{
+ struct super_block *sb = sdp->sd_vfs;
+
free_percpu(sdp->sd_lkstats);
+ sb->s_fs_info = NULL;
kfree(sdp);
}
@@ -1314,7 +1317,6 @@ fail_iput:
iput(sdp->sd_inode);
fail_free:
free_sbd(sdp);
- sb->s_fs_info = NULL;
return error;
}
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 748125653d6c..c3c8842920d2 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -764,7 +764,6 @@ fail_reg:
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
wait_for_completion(&sdp->sd_kobj_unregister);
- sb->s_fs_info = NULL;
return error;
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 233abf598f65..3729391a18f3 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1691,6 +1691,8 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
if (wpc->iomap.flags & IOMAP_F_SHARED)
ioend_flags |= IOMAP_IOEND_SHARED;
+ if (folio_test_dropbehind(folio))
+ ioend_flags |= IOMAP_IOEND_DONTCACHE;
if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
ioend_flags |= IOMAP_IOEND_BOUNDARY;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index df575a873ec6..9029cd216912 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -15,6 +15,7 @@
#include <linux/mempool.h>
#include <linux/seq_file.h>
#include <linux/writeback.h>
+#include <linux/migrate.h>
#include "jfs_incore.h"
#include "jfs_superblock.h"
#include "jfs_filsys.h"
@@ -151,7 +152,59 @@ static inline void dec_io(struct folio *folio, blk_status_t status,
handler(folio, anchor->status);
}
+#ifdef CONFIG_MIGRATION
+static int __metapage_migrate_folio(struct address_space *mapping,
+ struct folio *dst, struct folio *src,
+ enum migrate_mode mode)
+{
+ struct meta_anchor *src_anchor = src->private;
+ struct metapage *mps[MPS_PER_PAGE] = {0};
+ struct metapage *mp;
+ int i, rc;
+
+ for (i = 0; i < MPS_PER_PAGE; i++) {
+ mp = src_anchor->mp[i];
+ if (mp && metapage_locked(mp))
+ return -EAGAIN;
+ }
+
+ rc = filemap_migrate_folio(mapping, dst, src, mode);
+ if (rc != MIGRATEPAGE_SUCCESS)
+ return rc;
+
+ for (i = 0; i < MPS_PER_PAGE; i++) {
+ mp = src_anchor->mp[i];
+ if (!mp)
+ continue;
+ if (unlikely(insert_metapage(dst, mp))) {
+ /* If error, roll-back previosly inserted pages */
+ for (int j = 0 ; j < i; j++) {
+ if (mps[j])
+ remove_metapage(dst, mps[j]);
+ }
+ return -EAGAIN;
+ }
+ mps[i] = mp;
+ }
+
+ /* Update the metapage and remove it from src */
+ for (i = 0; i < MPS_PER_PAGE; i++) {
+ mp = mps[i];
+ if (mp) {
+ int page_offset = mp->data - folio_address(src);
+
+ mp->data = folio_address(dst) + page_offset;
+ mp->folio = dst;
+ remove_metapage(src, mp);
+ }
+ }
+
+ return MIGRATEPAGE_SUCCESS;
+}
+#endif /* CONFIG_MIGRATION */
+
#else
+
static inline struct metapage *folio_to_mp(struct folio *folio, int offset)
{
return folio->private;
@@ -175,6 +228,35 @@ static inline void remove_metapage(struct folio *folio, struct metapage *mp)
#define inc_io(folio) do {} while(0)
#define dec_io(folio, status, handler) handler(folio, status)
+#ifdef CONFIG_MIGRATION
+static int __metapage_migrate_folio(struct address_space *mapping,
+ struct folio *dst, struct folio *src,
+ enum migrate_mode mode)
+{
+ struct metapage *mp;
+ int page_offset;
+ int rc;
+
+ mp = folio_to_mp(src, 0);
+ if (metapage_locked(mp))
+ return -EAGAIN;
+
+ rc = filemap_migrate_folio(mapping, dst, src, mode);
+ if (rc != MIGRATEPAGE_SUCCESS)
+ return rc;
+
+ if (unlikely(insert_metapage(dst, mp)))
+ return -EAGAIN;
+
+ page_offset = mp->data - folio_address(src);
+ mp->data = folio_address(dst) + page_offset;
+ mp->folio = dst;
+ remove_metapage(src, mp);
+
+ return MIGRATEPAGE_SUCCESS;
+}
+#endif /* CONFIG_MIGRATION */
+
#endif
static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
@@ -554,6 +636,29 @@ static bool metapage_release_folio(struct folio *folio, gfp_t gfp_mask)
return ret;
}
+#ifdef CONFIG_MIGRATION
+/*
+ * metapage_migrate_folio - Migration function for JFS metapages
+ */
+static int metapage_migrate_folio(struct address_space *mapping,
+ struct folio *dst, struct folio *src,
+ enum migrate_mode mode)
+{
+ int expected_count;
+
+ if (!src->private)
+ return filemap_migrate_folio(mapping, dst, src, mode);
+
+ /* Check whether page does not have extra refs before we do more work */
+ expected_count = folio_expected_ref_count(src) + 1;
+ if (folio_ref_count(src) != expected_count)
+ return -EAGAIN;
+ return __metapage_migrate_folio(mapping, dst, src, mode);
+}
+#else
+#define metapage_migrate_folio NULL
+#endif /* CONFIG_MIGRATION */
+
static void metapage_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
@@ -570,6 +675,7 @@ const struct address_space_operations jfs_metapage_aops = {
.release_folio = metapage_release_folio,
.invalidate_folio = metapage_invalidate_folio,
.dirty_folio = filemap_dirty_folio,
+ .migrate_folio = metapage_migrate_folio,
};
struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 0d1b6d35ff3b..18b3dc74c70e 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -78,7 +78,8 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in
* [!] NOTE: This must be run in the same thread as ->issue_read() was called
* in as we access the readahead_control struct.
*/
-static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
+static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq,
+ struct readahead_control *ractl)
{
struct netfs_io_request *rreq = subreq->rreq;
size_t rsize = subreq->len;
@@ -86,7 +87,7 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
- if (rreq->ractl) {
+ if (ractl) {
/* If we don't have sufficient folios in the rolling buffer,
* extract a folioq's worth from the readahead region at a time
* into the buffer. Note that this acquires a ref on each page
@@ -99,7 +100,7 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
while (rreq->submitted < subreq->start + rsize) {
ssize_t added;
- added = rolling_buffer_load_from_ra(&rreq->buffer, rreq->ractl,
+ added = rolling_buffer_load_from_ra(&rreq->buffer, ractl,
&put_batch);
if (added < 0)
return added;
@@ -211,7 +212,8 @@ static void netfs_issue_read(struct netfs_io_request *rreq,
* slicing up the region to be read according to available cache blocks and
* network rsize.
*/
-static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
+static void netfs_read_to_pagecache(struct netfs_io_request *rreq,
+ struct readahead_control *ractl)
{
struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned long long start = rreq->start;
@@ -262,9 +264,9 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
if (ret < 0) {
subreq->error = ret;
/* Not queued - release both refs. */
- netfs_put_subrequest(subreq, false,
+ netfs_put_subrequest(subreq,
netfs_sreq_trace_put_cancel);
- netfs_put_subrequest(subreq, false,
+ netfs_put_subrequest(subreq,
netfs_sreq_trace_put_cancel);
break;
}
@@ -291,14 +293,14 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
break;
issue:
- slice = netfs_prepare_read_iterator(subreq);
+ slice = netfs_prepare_read_iterator(subreq, ractl);
if (slice < 0) {
ret = slice;
subreq->error = ret;
trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
/* Not queued - release both refs. */
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
break;
}
size -= slice;
@@ -312,7 +314,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
if (unlikely(size > 0)) {
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
- netfs_wake_read_collector(rreq);
+ netfs_wake_collector(rreq);
}
/* Defer error return as we may need to wait for outstanding I/O. */
@@ -359,18 +361,15 @@ void netfs_readahead(struct readahead_control *ractl)
netfs_rreq_expand(rreq, ractl);
- rreq->ractl = ractl;
rreq->submitted = rreq->start;
if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
goto cleanup_free;
- netfs_read_to_pagecache(rreq);
+ netfs_read_to_pagecache(rreq, ractl);
- netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
- return;
+ return netfs_put_request(rreq, netfs_rreq_trace_put_return);
cleanup_free:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
- return;
+ return netfs_put_request(rreq, netfs_rreq_trace_put_failed);
}
EXPORT_SYMBOL(netfs_readahead);
@@ -389,7 +388,6 @@ static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct fo
if (added < 0)
return added;
rreq->submitted = rreq->start + added;
- rreq->ractl = (struct readahead_control *)1UL;
return 0;
}
@@ -459,7 +457,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
rreq->submitted = rreq->start + flen;
- netfs_read_to_pagecache(rreq);
+ netfs_read_to_pagecache(rreq, NULL);
if (sink)
folio_put(sink);
@@ -470,11 +468,11 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
folio_mark_uptodate(folio);
}
folio_unlock(folio);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
discard:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+ netfs_put_request(rreq, netfs_rreq_trace_put_discard);
alloc_error:
folio_unlock(folio);
return ret;
@@ -528,13 +526,13 @@ int netfs_read_folio(struct file *file, struct folio *folio)
if (ret < 0)
goto discard;
- netfs_read_to_pagecache(rreq);
+ netfs_read_to_pagecache(rreq, NULL);
ret = netfs_wait_for_read(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
discard:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+ netfs_put_request(rreq, netfs_rreq_trace_put_discard);
alloc_error:
folio_unlock(folio);
return ret;
@@ -685,11 +683,11 @@ retry:
if (ret < 0)
goto error_put;
- netfs_read_to_pagecache(rreq);
+ netfs_read_to_pagecache(rreq, NULL);
ret = netfs_wait_for_read(rreq);
if (ret < 0)
goto error;
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
have_folio:
ret = folio_wait_private_2_killable(folio);
@@ -701,7 +699,7 @@ have_folio_no_wait:
return 0;
error_put:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
+ netfs_put_request(rreq, netfs_rreq_trace_put_failed);
error:
if (folio) {
folio_unlock(folio);
@@ -750,13 +748,13 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
if (ret < 0)
goto error_put;
- netfs_read_to_pagecache(rreq);
+ netfs_read_to_pagecache(rreq, NULL);
ret = netfs_wait_for_read(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
error_put:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+ netfs_put_request(rreq, netfs_rreq_trace_put_discard);
error:
_leave(" = %d", ret);
return ret;
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index b4826360a411..72a3e6db2524 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -115,8 +115,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
size_t max_chunk = mapping_max_folio_size(mapping);
bool maybe_trouble = false;
- if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) ||
- iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC))
+ if (unlikely(iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC))
) {
wbc_attach_fdatawrite_inode(&wbc, mapping->host);
@@ -386,7 +385,7 @@ out:
wbc_detach_inode(&wbc);
if (ret2 == -EIOCBQUEUED)
return ret2;
- if (ret == 0)
+ if (ret == 0 && ret2 < 0)
ret = ret2;
}
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index 5e3f0aeb51f3..a05e13472baf 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -85,7 +85,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
break;
}
}
@@ -103,19 +103,16 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
rreq->netfs_ops->issue_read(subreq);
if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
- netfs_wait_for_pause(rreq);
+ netfs_wait_for_paused_read(rreq);
if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
break;
- if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
- test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
- break;
cond_resched();
} while (size > 0);
if (unlikely(size > 0)) {
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
- netfs_wake_read_collector(rreq);
+ netfs_wake_collector(rreq);
}
return ret;
@@ -144,7 +141,7 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
ret = netfs_dispatch_unbuffered_reads(rreq);
if (!rreq->submitted) {
- netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
+ netfs_put_request(rreq, netfs_rreq_trace_put_no_submit);
inode_dio_end(rreq->inode);
ret = 0;
goto out;
@@ -188,7 +185,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
iocb->ki_pos, orig_count,
- NETFS_DIO_READ);
+ iocb->ki_flags & IOCB_DIRECT ?
+ NETFS_DIO_READ : NETFS_UNBUFFERED_READ);
if (IS_ERR(rreq))
return PTR_ERR(rreq);
@@ -236,7 +234,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
}
out:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
if (ret > 0)
orig_count -= ret;
return ret;
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 42ce53cc216e..fa9a5bf3c6d5 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -87,6 +87,8 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
}
__set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags);
+ if (async)
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
/* Copy the data into the bounce buffer and encrypt it. */
// TODO
@@ -105,19 +107,15 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
if (!async) {
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
- wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
- ret = wreq->error;
- if (ret == 0) {
- ret = wreq->transferred;
+ ret = netfs_wait_for_write(wreq);
+ if (ret > 0)
iocb->ki_pos += ret;
- }
} else {
ret = -EIOCBQUEUED;
}
out:
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
return ret;
}
EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked);
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index b1722a82c03d..e4308457633c 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -192,8 +192,7 @@ EXPORT_SYMBOL(__fscache_clear_page_bits);
/*
* Deal with the completion of writing the data to the cache.
*/
-static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
- bool was_async)
+static void fscache_wreq_done(void *priv, ssize_t transferred_or_error)
{
struct fscache_write_request *wreq = priv;
@@ -202,8 +201,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
wreq->set_bits);
if (wreq->term_func)
- wreq->term_func(wreq->term_func_priv, transferred_or_error,
- was_async);
+ wreq->term_func(wreq->term_func_priv, transferred_or_error);
fscache_end_operation(&wreq->cache_resources);
kfree(wreq);
}
@@ -255,14 +253,14 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
return;
abandon_end:
- return fscache_wreq_done(wreq, ret, false);
+ return fscache_wreq_done(wreq, ret);
abandon_free:
kfree(wreq);
abandon:
if (using_pgpriv2)
fscache_clear_page_bits(mapping, start, len, cond);
if (term_func)
- term_func(term_func_priv, ret, false);
+ term_func(term_func_priv, ret);
}
EXPORT_SYMBOL(__fscache_write_to_cache);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 1c4f953c3d68..e2ee9183392b 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -23,7 +23,7 @@
/*
* buffered_read.c
*/
-void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
@@ -62,6 +62,14 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq,
enum netfs_folioq_trace trace);
void netfs_reset_iter(struct netfs_io_subrequest *subreq);
+void netfs_wake_collector(struct netfs_io_request *rreq);
+void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq);
+void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
+ struct netfs_io_stream *stream);
+ssize_t netfs_wait_for_read(struct netfs_io_request *rreq);
+ssize_t netfs_wait_for_write(struct netfs_io_request *rreq);
+void netfs_wait_for_paused_read(struct netfs_io_request *rreq);
+void netfs_wait_for_paused_write(struct netfs_io_request *rreq);
/*
* objects.c
@@ -71,9 +79,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
loff_t start, size_t len,
enum netfs_io_origin origin);
void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
-void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async);
-void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
- enum netfs_rreq_ref_trace what);
+void netfs_clear_subrequests(struct netfs_io_request *rreq);
+void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq);
static inline void netfs_see_request(struct netfs_io_request *rreq,
@@ -92,11 +99,9 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq,
/*
* read_collect.c
*/
+bool netfs_read_collection(struct netfs_io_request *rreq);
void netfs_read_collection_worker(struct work_struct *work);
-void netfs_wake_read_collector(struct netfs_io_request *rreq);
-void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
-ssize_t netfs_wait_for_read(struct netfs_io_request *rreq);
-void netfs_wait_for_pause(struct netfs_io_request *rreq);
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
/*
* read_pgpriv2.c
@@ -176,8 +181,8 @@ static inline void netfs_stat_d(atomic_t *stat)
* write_collect.c
*/
int netfs_folio_written_back(struct folio *folio);
+bool netfs_write_collection(struct netfs_io_request *wreq);
void netfs_write_collection_worker(struct work_struct *work);
-void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async);
/*
* write_issue.c
@@ -198,8 +203,8 @@ struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len
int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache);
-int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
- struct folio *writethrough_cache);
+ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
+ struct folio *writethrough_cache);
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
/*
@@ -255,6 +260,21 @@ static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
}
/*
+ * Clear and wake up a NETFS_RREQ_* flag bit on a request.
+ */
+static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq,
+ unsigned int rreq_flag,
+ enum netfs_rreq_trace trace)
+{
+ if (test_bit(rreq_flag, &rreq->flags)) {
+ trace_netfs_rreq(rreq, trace);
+ clear_bit_unlock(rreq_flag, &rreq->flags);
+ smp_mb__after_atomic(); /* Set flag before task state */
+ wake_up(&rreq->waitq);
+ }
+}
+
+/*
* fscache-cache.c
*/
#ifdef CONFIG_PROC_FS
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 70ecc8f5f210..3db401d269e7 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
[NETFS_READ_GAPS] = "RG",
[NETFS_READ_SINGLE] = "R1",
[NETFS_READ_FOR_WRITE] = "RW",
+ [NETFS_UNBUFFERED_READ] = "UR",
[NETFS_DIO_READ] = "DR",
[NETFS_WRITEBACK] = "WB",
[NETFS_WRITEBACK_SINGLE] = "W1",
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 7099aa07737a..43b67a28a8fa 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -313,3 +313,222 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp)
return true;
}
EXPORT_SYMBOL(netfs_release_folio);
+
+/*
+ * Wake the collection work item.
+ */
+void netfs_wake_collector(struct netfs_io_request *rreq)
+{
+ if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) &&
+ !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) {
+ queue_work(system_unbound_wq, &rreq->work);
+ } else {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue);
+ wake_up(&rreq->waitq);
+ }
+}
+
+/*
+ * Mark a subrequest as no longer being in progress and, if need be, wake the
+ * collector.
+ */
+void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *rreq = subreq->rreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[subreq->stream_nr];
+
+ clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */
+
+ /* If we are at the head of the queue, wake up the collector. */
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests) ||
+ test_bit(NETFS_RREQ_RETRYING, &rreq->flags))
+ netfs_wake_collector(rreq);
+}
+
+/*
+ * Wait for all outstanding I/O in a stream to quiesce.
+ */
+void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
+ struct netfs_io_stream *stream)
+{
+ struct netfs_io_subrequest *subreq;
+ DEFINE_WAIT(myself);
+
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ continue;
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ for (;;) {
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ break;
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+ }
+
+ finish_wait(&rreq->waitq, &myself);
+}
+
+/*
+ * Perform collection in app thread if not offloaded to workqueue.
+ */
+static int netfs_collect_in_app(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
+{
+ bool need_collect = false, inactive = true;
+
+ for (int i = 0; i < NR_IO_STREAMS; i++) {
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[i];
+
+ if (!stream->active)
+ continue;
+ inactive = false;
+ trace_netfs_collect_stream(rreq, stream);
+ subreq = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest,
+ rreq_link);
+ if (subreq &&
+ (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
+ need_collect = true;
+ break;
+ }
+ }
+
+ if (!need_collect && !inactive)
+ return 0; /* Sleep */
+
+ __set_current_state(TASK_RUNNING);
+ if (collector(rreq)) {
+ /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */
+ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
+ return 1; /* Done */
+ }
+
+ if (inactive) {
+ WARN(true, "Failed to collect inactive req R=%08x\n",
+ rreq->debug_id);
+ cond_resched();
+ }
+ return 2; /* Again */
+}
+
+/*
+ * Wait for a request to complete, successfully or otherwise.
+ */
+static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
+{
+ DEFINE_WAIT(myself);
+ ssize_t ret;
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
+ switch (netfs_collect_in_app(rreq, collector)) {
+ case 0:
+ break;
+ case 1:
+ goto all_collected;
+ case 2:
+ continue;
+ }
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+all_collected:
+ finish_wait(&rreq->waitq, &myself);
+
+ ret = rreq->error;
+ if (ret == 0) {
+ ret = rreq->transferred;
+ switch (rreq->origin) {
+ case NETFS_DIO_READ:
+ case NETFS_DIO_WRITE:
+ case NETFS_READ_SINGLE:
+ case NETFS_UNBUFFERED_READ:
+ case NETFS_UNBUFFERED_WRITE:
+ break;
+ default:
+ if (rreq->submitted < rreq->len) {
+ trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
+ ret = -EIO;
+ }
+ break;
+ }
+ }
+
+ return ret;
+}
+
+ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_request(rreq, netfs_read_collection);
+}
+
+ssize_t netfs_wait_for_write(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_request(rreq, netfs_write_collection);
+}
+
+/*
+ * Wait for a paused operation to unpause or complete in some manner.
+ */
+static void netfs_wait_for_pause(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
+{
+ DEFINE_WAIT(myself);
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
+ switch (netfs_collect_in_app(rreq, collector)) {
+ case 0:
+ break;
+ case 1:
+ goto all_collected;
+ case 2:
+ continue;
+ }
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
+ !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+all_collected:
+ finish_wait(&rreq->waitq, &myself);
+}
+
+void netfs_wait_for_paused_read(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_pause(rreq, netfs_read_collection);
+}
+
+void netfs_wait_for_paused_write(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_pause(rreq, netfs_write_collection);
+}
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index dc6b41ef18b0..e8c99738b5bb 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -10,6 +10,8 @@
#include <linux/delay.h>
#include "internal.h"
+static void netfs_free_request(struct work_struct *work);
+
/*
* Allocate an I/O request and initialise it.
*/
@@ -34,6 +36,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
memset(rreq, 0, kmem_cache_size(cache));
+ INIT_WORK(&rreq->cleanup_work, netfs_free_request);
rreq->start = start;
rreq->len = len;
rreq->origin = origin;
@@ -49,13 +52,14 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
INIT_LIST_HEAD(&rreq->io_streams[0].subrequests);
INIT_LIST_HEAD(&rreq->io_streams[1].subrequests);
init_waitqueue_head(&rreq->waitq);
- refcount_set(&rreq->ref, 1);
+ refcount_set(&rreq->ref, 2);
if (origin == NETFS_READAHEAD ||
origin == NETFS_READPAGE ||
origin == NETFS_READ_GAPS ||
origin == NETFS_READ_SINGLE ||
origin == NETFS_READ_FOR_WRITE ||
+ origin == NETFS_UNBUFFERED_READ ||
origin == NETFS_DIO_READ) {
INIT_WORK(&rreq->work, netfs_read_collection_worker);
rreq->io_streams[0].avail = true;
@@ -64,8 +68,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
- if (file && file->f_flags & O_NONBLOCK)
- __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
if (rreq->netfs_ops->init_request) {
ret = rreq->netfs_ops->init_request(rreq, file);
if (ret < 0) {
@@ -75,7 +77,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
atomic_inc(&ctx->io_count);
- trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
+ trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), netfs_rreq_trace_new);
netfs_proc_add_rreq(rreq);
netfs_stat(&netfs_n_rh_rreq);
return rreq;
@@ -89,7 +91,7 @@ void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace
trace_netfs_rreq_ref(rreq->debug_id, r + 1, what);
}
-void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
+void netfs_clear_subrequests(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream;
@@ -101,8 +103,7 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
subreq = list_first_entry(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, was_async,
- netfs_sreq_trace_put_clear);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_clear);
}
}
}
@@ -118,13 +119,19 @@ static void netfs_free_request_rcu(struct rcu_head *rcu)
static void netfs_free_request(struct work_struct *work)
{
struct netfs_io_request *rreq =
- container_of(work, struct netfs_io_request, work);
+ container_of(work, struct netfs_io_request, cleanup_work);
struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned int i;
trace_netfs_rreq(rreq, netfs_rreq_trace_free);
+
+ /* Cancel/flush the result collection worker. That does not carry a
+ * ref of its own, so we must wait for it somewhere.
+ */
+ cancel_work_sync(&rreq->work);
+
netfs_proc_del_rreq(rreq);
- netfs_clear_subrequests(rreq, false);
+ netfs_clear_subrequests(rreq);
if (rreq->netfs_ops->free_request)
rreq->netfs_ops->free_request(rreq);
if (rreq->cache_resources.ops)
@@ -145,8 +152,7 @@ static void netfs_free_request(struct work_struct *work)
call_rcu(&rreq->rcu, netfs_free_request_rcu);
}
-void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
- enum netfs_rreq_ref_trace what)
+void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what)
{
unsigned int debug_id;
bool dead;
@@ -156,15 +162,8 @@ void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
debug_id = rreq->debug_id;
dead = __refcount_dec_and_test(&rreq->ref, &r);
trace_netfs_rreq_ref(debug_id, r - 1, what);
- if (dead) {
- if (was_async) {
- rreq->work.func = netfs_free_request;
- if (!queue_work(system_unbound_wq, &rreq->work))
- WARN_ON(1);
- } else {
- netfs_free_request(&rreq->work);
- }
- }
+ if (dead)
+ WARN_ON(!queue_work(system_unbound_wq, &rreq->cleanup_work));
}
}
@@ -206,8 +205,7 @@ void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
what);
}
-static void netfs_free_subrequest(struct netfs_io_subrequest *subreq,
- bool was_async)
+static void netfs_free_subrequest(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
@@ -216,10 +214,10 @@ static void netfs_free_subrequest(struct netfs_io_subrequest *subreq,
rreq->netfs_ops->free_subrequest(subreq);
mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool);
netfs_stat_d(&netfs_n_rh_sreq);
- netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq);
+ netfs_put_request(rreq, netfs_rreq_trace_put_subreq);
}
-void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async,
+void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
enum netfs_sreq_ref_trace what)
{
unsigned int debug_index = subreq->debug_index;
@@ -230,5 +228,5 @@ void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async,
dead = __refcount_dec_and_test(&subreq->ref, &r);
trace_netfs_sreq_ref(debug_id, debug_index, r - 1, what);
if (dead)
- netfs_free_subrequest(subreq, was_async);
+ netfs_free_subrequest(subreq);
}
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 23c75755ad4e..96ee18af28ef 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -83,14 +83,12 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
}
just_unlock:
- if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
- if (folio->index == rreq->no_unlock_folio &&
- test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
- _debug("no unlock");
- } else {
- trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
- folio_unlock(folio);
- }
+ if (folio->index == rreq->no_unlock_folio &&
+ test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
+ _debug("no unlock");
+ } else {
+ trace_netfs_folio(folio, netfs_folio_trace_read_unlock);
+ folio_unlock(folio);
}
folioq_clear(folioq, slot);
@@ -280,9 +278,13 @@ reassess:
stream->need_retry = true;
notes |= NEED_RETRY | MADE_PROGRESS;
break;
+ } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) {
+ notes |= MADE_PROGRESS;
} else {
if (!stream->failed)
- stream->transferred = stream->collected_to - rreq->start;
+ stream->transferred += transferred;
+ if (front->transferred < front->len)
+ set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags);
notes |= MADE_PROGRESS;
}
@@ -297,7 +299,7 @@ reassess:
struct netfs_io_subrequest, rreq_link);
stream->front = front;
spin_unlock(&rreq->lock);
- netfs_put_subrequest(remove, false,
+ netfs_put_subrequest(remove,
notes & ABANDON_SREQ ?
netfs_sreq_trace_put_abandon :
netfs_sreq_trace_put_done);
@@ -311,14 +313,8 @@ reassess:
if (notes & NEED_RETRY)
goto need_retry;
- if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_unpause);
- clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags);
- smp_mb__after_atomic(); /* Set PAUSE before task state */
- wake_up(&rreq->waitq);
- }
-
if (notes & MADE_PROGRESS) {
+ netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
//cond_resched();
goto reassess;
}
@@ -342,24 +338,10 @@ need_retry:
*/
static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
unsigned int i;
- /* Collect unbuffered reads and direct reads, adding up the transfer
- * sizes until we find the first short or failed subrequest.
- */
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- rreq->transferred += subreq->transferred;
-
- if (subreq->transferred < subreq->len ||
- test_bit(NETFS_SREQ_FAILED, &subreq->flags)) {
- rreq->error = subreq->error;
- break;
- }
- }
-
- if (rreq->origin == NETFS_DIO_READ) {
+ if (rreq->origin == NETFS_UNBUFFERED_READ ||
+ rreq->origin == NETFS_DIO_READ) {
for (i = 0; i < rreq->direct_bv_count; i++) {
flush_dcache_page(rreq->direct_bv[i].bv_page);
// TODO: cifs marks pages in the destination buffer
@@ -377,7 +359,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
- if (rreq->origin == NETFS_DIO_READ)
+ if (rreq->origin == NETFS_UNBUFFERED_READ ||
+ rreq->origin == NETFS_DIO_READ)
inode_dio_end(rreq->inode);
}
@@ -410,7 +393,7 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
* Note that we're in normal kernel thread context at this point, possibly
* running on a workqueue.
*/
-static void netfs_read_collection(struct netfs_io_request *rreq)
+bool netfs_read_collection(struct netfs_io_request *rreq)
{
struct netfs_io_stream *stream = &rreq->io_streams[0];
@@ -420,11 +403,11 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
* queue is empty.
*/
if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
- return;
+ return false;
smp_rmb(); /* Read ALL_QUEUED before subreq lists. */
if (!list_empty(&stream->subrequests))
- return;
+ return false;
/* Okay, declare that all I/O is complete. */
rreq->transferred = stream->transferred;
@@ -433,6 +416,7 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
//netfs_rreq_is_still_valid(rreq);
switch (rreq->origin) {
+ case NETFS_UNBUFFERED_READ:
case NETFS_DIO_READ:
case NETFS_READ_GAPS:
netfs_rreq_assess_dio(rreq);
@@ -445,14 +429,15 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
}
task_io_account_read(rreq->transferred);
- trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
- clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
+ netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
+ /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
trace_netfs_rreq(rreq, netfs_rreq_trace_done);
- netfs_clear_subrequests(rreq, false);
+ netfs_clear_subrequests(rreq);
netfs_unlock_abandoned_read_pages(rreq);
if (unlikely(rreq->copy_to_cache))
netfs_pgpriv2_end_copy_to_cache(rreq);
+ return true;
}
void netfs_read_collection_worker(struct work_struct *work)
@@ -460,26 +445,12 @@ void netfs_read_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
- netfs_read_collection(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_work);
-}
-
-/*
- * Wake the collection work item.
- */
-void netfs_wake_read_collector(struct netfs_io_request *rreq)
-{
- if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) &&
- !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) {
- if (!work_pending(&rreq->work)) {
- netfs_get_request(rreq, netfs_rreq_trace_get_work);
- if (!queue_work(system_unbound_wq, &rreq->work))
- netfs_put_request(rreq, true, netfs_rreq_trace_put_work_nq);
- }
- } else {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue);
- wake_up(&rreq->waitq);
+ if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_read_collection(rreq))
+ /* Drop the ref from the IN_PROGRESS flag. */
+ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
+ else
+ netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
}
}
@@ -511,7 +482,7 @@ void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq)
list_is_first(&subreq->rreq_link, &stream->subrequests)
) {
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
- netfs_wake_read_collector(rreq);
+ netfs_wake_collector(rreq);
}
}
EXPORT_SYMBOL(netfs_read_subreq_progress);
@@ -535,7 +506,6 @@ EXPORT_SYMBOL(netfs_read_subreq_progress);
void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
switch (subreq->source) {
case NETFS_READ_FROM_CACHE:
@@ -582,23 +552,15 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
}
trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
-
- clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
- smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */
-
- /* If we are at the head of the queue, wake up the collector. */
- if (list_is_first(&subreq->rreq_link, &stream->subrequests) ||
- test_bit(NETFS_RREQ_RETRYING, &rreq->flags))
- netfs_wake_read_collector(rreq);
-
- netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated);
+ netfs_subreq_clear_in_progress(subreq);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
}
EXPORT_SYMBOL(netfs_read_subreq_terminated);
/*
* Handle termination of a read from the cache.
*/
-void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async)
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error)
{
struct netfs_io_subrequest *subreq = priv;
@@ -613,94 +575,3 @@ void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool
}
netfs_read_subreq_terminated(subreq);
}
-
-/*
- * Wait for the read operation to complete, successfully or otherwise.
- */
-ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
-{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
- DEFINE_WAIT(myself);
- ssize_t ret;
-
- for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
-
- subreq = list_first_entry_or_null(&stream->subrequests,
- struct netfs_io_subrequest, rreq_link);
- if (subreq &&
- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
- test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
- __set_current_state(TASK_RUNNING);
- netfs_read_collection(rreq);
- continue;
- }
-
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
- break;
-
- schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
- }
-
- finish_wait(&rreq->waitq, &myself);
-
- ret = rreq->error;
- if (ret == 0) {
- ret = rreq->transferred;
- switch (rreq->origin) {
- case NETFS_DIO_READ:
- case NETFS_READ_SINGLE:
- ret = rreq->transferred;
- break;
- default:
- if (rreq->submitted < rreq->len) {
- trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
- ret = -EIO;
- }
- break;
- }
- }
-
- return ret;
-}
-
-/*
- * Wait for a paused read operation to unpause or complete in some manner.
- */
-void netfs_wait_for_pause(struct netfs_io_request *rreq)
-{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
- DEFINE_WAIT(myself);
-
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
-
- for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
-
- if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
- subreq = list_first_entry_or_null(&stream->subrequests,
- struct netfs_io_subrequest, rreq_link);
- if (subreq &&
- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
- test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
- __set_current_state(TASK_RUNNING);
- netfs_read_collection(rreq);
- continue;
- }
- }
-
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
- !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
- break;
-
- schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
- }
-
- finish_wait(&rreq->waitq, &myself);
-}
diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c
index cf7727060215..5bbe906a551d 100644
--- a/fs/netfs/read_pgpriv2.c
+++ b/fs/netfs/read_pgpriv2.c
@@ -116,7 +116,7 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache(
return creq;
cancel_put:
- netfs_put_request(creq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(creq, netfs_rreq_trace_put_return);
cancel:
rreq->copy_to_cache = ERR_PTR(-ENOBUFS);
clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
@@ -155,7 +155,7 @@ void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq)
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags);
- netfs_put_request(creq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(creq, netfs_rreq_trace_put_return);
creq->copy_to_cache = NULL;
}
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index 0f294b26e08c..b99e84a8170a 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -173,7 +173,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous);
list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
}
@@ -257,35 +257,15 @@ abandon:
*/
void netfs_retry_reads(struct netfs_io_request *rreq)
{
- struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream = &rreq->io_streams[0];
- DEFINE_WAIT(myself);
netfs_stat(&netfs_n_rh_retry_read_req);
- set_bit(NETFS_RREQ_RETRYING, &rreq->flags);
-
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
- continue;
-
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
- for (;;) {
- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
-
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
- break;
-
- trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
- schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
- }
-
- finish_wait(&rreq->waitq, &myself);
- }
+ set_bit(NETFS_RREQ_RETRYING, &rreq->flags);
+ netfs_wait_for_in_progress_stream(rreq, stream);
clear_bit(NETFS_RREQ_RETRYING, &rreq->flags);
trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c
index fea0ecdecc53..fa622a6cd56d 100644
--- a/fs/netfs/read_single.c
+++ b/fs/netfs/read_single.c
@@ -142,7 +142,7 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
return ret;
cancel:
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
return ret;
}
@@ -185,11 +185,11 @@ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_ite
netfs_single_dispatch_read(rreq);
ret = netfs_wait_for_read(rreq);
- netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret;
cleanup_free:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
+ netfs_put_request(rreq, netfs_rreq_trace_put_failed);
return ret;
}
EXPORT_SYMBOL(netfs_read_single);
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 3fca59e6475d..e2b102ffb768 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -280,7 +280,7 @@ reassess_streams:
struct netfs_io_subrequest, rreq_link);
stream->front = front;
spin_unlock(&wreq->lock);
- netfs_put_subrequest(remove, false,
+ netfs_put_subrequest(remove,
notes & SAW_FAILURE ?
netfs_sreq_trace_put_cancel :
netfs_sreq_trace_put_done);
@@ -321,18 +321,14 @@ reassess_streams:
if (notes & NEED_RETRY)
goto need_retry;
- if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
- trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
- clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
- smp_mb__after_atomic(); /* Set PAUSE before task state */
- wake_up(&wreq->waitq);
- }
- if (notes & NEED_REASSESS) {
+ if (notes & MADE_PROGRESS) {
+ netfs_wake_rreq_flag(wreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
//cond_resched();
goto reassess_streams;
}
- if (notes & MADE_PROGRESS) {
+
+ if (notes & NEED_REASSESS) {
//cond_resched();
goto reassess_streams;
}
@@ -356,30 +352,21 @@ need_retry:
/*
* Perform the collection of subrequests, folios and encryption buffers.
*/
-void netfs_write_collection_worker(struct work_struct *work)
+bool netfs_write_collection(struct netfs_io_request *wreq)
{
- struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work);
struct netfs_inode *ictx = netfs_inode(wreq->inode);
size_t transferred;
int s;
_enter("R=%x", wreq->debug_id);
- netfs_see_request(wreq, netfs_rreq_trace_see_work);
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
- return;
- }
-
netfs_collect_write_results(wreq);
/* We're done when the app thread has finished posting subreqs and all
* the queues in all the streams are empty.
*/
- if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
- return;
- }
+ if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags))
+ return false;
smp_rmb(); /* Read ALL_QUEUED before lists. */
transferred = LONG_MAX;
@@ -387,10 +374,8 @@ void netfs_write_collection_worker(struct work_struct *work)
struct netfs_io_stream *stream = &wreq->io_streams[s];
if (!stream->active)
continue;
- if (!list_empty(&stream->subrequests)) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
- return;
- }
+ if (!list_empty(&stream->subrequests))
+ return false;
if (stream->transferred < transferred)
transferred = stream->transferred;
}
@@ -428,8 +413,8 @@ void netfs_write_collection_worker(struct work_struct *work)
inode_dio_end(wreq->inode);
_debug("finished");
- trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
- clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
+ netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
+ /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
if (wreq->iocb) {
size_t written = min(wreq->transferred, wreq->len);
@@ -440,19 +425,21 @@ void netfs_write_collection_worker(struct work_struct *work)
wreq->iocb = VFS_PTR_POISON;
}
- netfs_clear_subrequests(wreq, false);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete);
+ netfs_clear_subrequests(wreq);
+ return true;
}
-/*
- * Wake the collection work item.
- */
-void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
+void netfs_write_collection_worker(struct work_struct *work)
{
- if (!work_pending(&wreq->work)) {
- netfs_get_request(wreq, netfs_rreq_trace_get_work);
- if (!queue_work(system_unbound_wq, &wreq->work))
- netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq);
+ struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
+
+ netfs_see_request(rreq, netfs_rreq_trace_see_work);
+ if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_write_collection(rreq))
+ /* Drop the ref from the IN_PROGRESS flag. */
+ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
+ else
+ netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
}
}
@@ -460,7 +447,6 @@ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
* netfs_write_subrequest_terminated - Note the termination of a write operation.
* @_op: The I/O request that has terminated.
* @transferred_or_error: The amount of data transferred or an error code.
- * @was_async: The termination was asynchronous
*
* This tells the library that a contributory write I/O operation has
* terminated, one way or another, and that it should collect the results.
@@ -470,21 +456,16 @@ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
* negative error code. The library will look after reissuing I/O operations
* as appropriate and writing downloaded data to the cache.
*
- * If @was_async is true, the caller might be running in softirq or interrupt
- * context and we can't sleep.
- *
* When this is called, ownership of the subrequest is transferred back to the
* library, along with a ref.
*
* Note that %_op is a void* so that the function can be passed to
* kiocb::term_func without the need for a casting wrapper.
*/
-void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
- bool was_async)
+void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error)
{
struct netfs_io_subrequest *subreq = _op;
struct netfs_io_request *wreq = subreq->rreq;
- struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
@@ -495,8 +476,6 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
case NETFS_WRITE_TO_CACHE:
netfs_stat(&netfs_n_wh_write_done);
break;
- case NETFS_INVALID_WRITE:
- break;
default:
BUG();
}
@@ -536,15 +515,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
}
trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
-
- clear_and_wake_up_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
-
- /* If we are at the head of the queue, wake up the collector,
- * transferring a ref to it if we were the ones to do so.
- */
- if (list_is_first(&subreq->rreq_link, &stream->subrequests))
- netfs_wake_write_collector(wreq, was_async);
-
- netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+ netfs_subreq_clear_in_progress(subreq);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
}
EXPORT_SYMBOL(netfs_write_subrequest_terminated);
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 77279fc5b5a7..50bee2c4130d 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -134,7 +134,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
return wreq;
nomem:
wreq->error = -ENOMEM;
- netfs_put_request(wreq, false, netfs_rreq_trace_put_failed);
+ netfs_put_request(wreq, netfs_rreq_trace_put_failed);
return ERR_PTR(-ENOMEM);
}
@@ -233,7 +233,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream,
_enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
- return netfs_write_subrequest_terminated(subreq, subreq->error, false);
+ return netfs_write_subrequest_terminated(subreq, subreq->error);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
stream->issue_write(subreq);
@@ -542,7 +542,7 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq)
}
if (needs_poke)
- netfs_wake_write_collector(wreq, false);
+ netfs_wake_collector(wreq);
}
/*
@@ -576,6 +576,7 @@ int netfs_writepages(struct address_space *mapping,
goto couldnt_start;
}
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
trace_netfs_write(wreq, netfs_write_trace_writeback);
netfs_stat(&netfs_n_wh_writepages);
@@ -599,8 +600,9 @@ int netfs_writepages(struct address_space *mapping,
netfs_end_issue_write(wreq);
mutex_unlock(&ictx->wb_lock);
+ netfs_wake_collector(wreq);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
_leave(" = %d", error);
return error;
@@ -673,11 +675,11 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
/*
* End a write operation used when writing through the pagecache.
*/
-int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
- struct folio *writethrough_cache)
+ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
+ struct folio *writethrough_cache)
{
struct netfs_inode *ictx = netfs_inode(wreq->inode);
- int ret;
+ ssize_t ret;
_enter("R=%x", wreq->debug_id);
@@ -688,13 +690,11 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
mutex_unlock(&ictx->wb_lock);
- if (wreq->iocb) {
+ if (wreq->iocb)
ret = -EIOCBQUEUED;
- } else {
- wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
- ret = wreq->error;
- }
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ else
+ ret = netfs_wait_for_write(wreq);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
return ret;
}
@@ -722,10 +722,8 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
start += part;
len -= part;
rolling_buffer_advance(&wreq->buffer, part);
- if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
- trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
- wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags));
- }
+ if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags))
+ netfs_wait_for_paused_write(wreq);
if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
break;
}
@@ -885,7 +883,8 @@ int netfs_writeback_single(struct address_space *mapping,
goto couldnt_start;
}
- trace_netfs_write(wreq, netfs_write_trace_writeback);
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
+ trace_netfs_write(wreq, netfs_write_trace_writeback_single);
netfs_stat(&netfs_n_wh_writepages);
if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
@@ -914,8 +913,9 @@ stop:
set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
mutex_unlock(&ictx->wb_lock);
+ netfs_wake_collector(wreq);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
_leave(" = %d", ret);
return ret;
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index 545d33079a77..9d1d8a8bab72 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -39,9 +39,10 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
break;
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
- struct iov_iter source = subreq->io_iter;
+ struct iov_iter source;
- iov_iter_revert(&source, subreq->len - source.count);
+ netfs_reset_iter(subreq);
+ source = subreq->io_iter;
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_write(stream, subreq, &source);
}
@@ -131,7 +132,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
}
@@ -199,7 +200,6 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
*/
void netfs_retry_writes(struct netfs_io_request *wreq)
{
- struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream;
int s;
@@ -208,16 +208,13 @@ void netfs_retry_writes(struct netfs_io_request *wreq)
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
+ set_bit(NETFS_RREQ_RETRYING, &wreq->flags);
for (s = 0; s < NR_IO_STREAMS; s++) {
stream = &wreq->io_streams[s];
- if (!stream->active)
- continue;
-
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
- }
+ if (stream->active)
+ netfs_wait_for_in_progress_stream(wreq, stream);
}
+ clear_bit(NETFS_RREQ_RETRYING, &wreq->flags);
// TODO: Enc: Fetch changed partial pages
// TODO: Enc: Reencrypt content if needed.
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index e278a1ad1ca3..8b0785178731 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -367,6 +367,7 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
sreq = netfs->sreq;
if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
+ sreq->rreq->origin != NETFS_UNBUFFERED_READ &&
sreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 0d8f7fb15c2e..dd0c8e560ef6 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -2102,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree,
ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
if (ret < 0) {
- if (unlikely(ret == -ENOENT))
+ if (unlikely(ret == -ENOENT)) {
nilfs_crit(btree->b_inode->i_sb,
"writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
btree->b_inode->i_ino,
(unsigned long long)key, level);
+ ret = -EINVAL;
+ }
goto out;
}
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 893ab36824cc..2d8dc6b35b54 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap,
dat = nilfs_bmap_get_dat(bmap);
key = nilfs_bmap_data_get_key(bmap, bh);
ptr = nilfs_direct_get_ptr(bmap, key);
+ if (ptr == NILFS_BMAP_INVALID_PTR)
+ return -EINVAL;
+
if (!buffer_nilfs_volatile(bh)) {
oldreq.pr_entry_nr = ptr;
newreq.pr_entry_nr = ptr;
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 2f850a18d6e7..946b0d3534a5 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -422,8 +422,6 @@ static int nilfs_mdt_write_folio(struct folio *folio,
if (wbc->sync_mode == WB_SYNC_ALL)
err = nilfs_construct_segment(sb);
- else if (wbc->for_reclaim)
- nilfs_flush_segment(sb, inode->i_ino);
return err;
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 83970d97840b..61a4141f8d6b 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2221,22 +2221,6 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
spin_unlock(&sci->sc_state_lock);
}
-/**
- * nilfs_flush_segment - trigger a segment construction for resource control
- * @sb: super block
- * @ino: inode number of the file to be flushed out.
- */
-void nilfs_flush_segment(struct super_block *sb, ino_t ino)
-{
- struct the_nilfs *nilfs = sb->s_fs_info;
- struct nilfs_sc_info *sci = nilfs->ns_writer;
-
- if (!sci || nilfs_doing_construction())
- return;
- nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
- /* assign bit 0 to data files */
-}
-
struct nilfs_segctor_wait_request {
wait_queue_entry_t wq;
__u32 seq;
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index f723f47ddc4e..4b39ed43ae72 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -226,7 +226,6 @@ extern void nilfs_relax_pressure_in_lock(struct super_block *);
extern int nilfs_construct_segment(struct super_block *);
extern int nilfs_construct_dsync_segment(struct super_block *, struct inode *,
loff_t, loff_t);
-extern void nilfs_flush_segment(struct super_block *, ino_t);
extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
void **);
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 34ed242e1063..1e99a35691cd 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -913,7 +913,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
struct ntfs_inode *ni = ntfs_i(inode);
u64 valid = ni->i_valid;
struct ntfs_sb_info *sbi = ni->mi.sbi;
- struct page *page, **pages = NULL;
+ struct page **pages = NULL;
+ struct folio *folio;
size_t written = 0;
u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits;
u32 frame_size = 1u << frame_bits;
@@ -923,7 +924,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
u64 frame_vbo;
pgoff_t index;
bool frame_uptodate;
- struct folio *folio;
if (frame_size < PAGE_SIZE) {
/*
@@ -977,8 +977,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
pages_per_frame);
if (err) {
for (ip = 0; ip < pages_per_frame; ip++) {
- page = pages[ip];
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
folio_unlock(folio);
folio_put(folio);
}
@@ -989,10 +988,9 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
ip = off >> PAGE_SHIFT;
off = offset_in_page(valid);
for (; ip < pages_per_frame; ip++, off = 0) {
- page = pages[ip];
- folio = page_folio(page);
- zero_user_segment(page, off, PAGE_SIZE);
- flush_dcache_page(page);
+ folio = page_folio(pages[ip]);
+ folio_zero_segment(folio, off, PAGE_SIZE);
+ flush_dcache_folio(folio);
folio_mark_uptodate(folio);
}
@@ -1001,8 +999,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
ni_unlock(ni);
for (ip = 0; ip < pages_per_frame; ip++) {
- page = pages[ip];
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
folio_mark_uptodate(folio);
folio_unlock(folio);
folio_put(folio);
@@ -1046,8 +1043,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
if (err) {
for (ip = 0; ip < pages_per_frame;
ip++) {
- page = pages[ip];
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
folio_unlock(folio);
folio_put(folio);
}
@@ -1065,10 +1061,10 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
for (;;) {
size_t cp, tail = PAGE_SIZE - off;
- page = pages[ip];
- cp = copy_page_from_iter_atomic(page, off,
+ folio = page_folio(pages[ip]);
+ cp = copy_folio_from_iter_atomic(folio, off,
min(tail, bytes), from);
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
copied += cp;
bytes -= cp;
@@ -1088,9 +1084,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
ni_unlock(ni);
for (ip = 0; ip < pages_per_frame; ip++) {
- page = pages[ip];
- ClearPageDirty(page);
- folio = page_folio(page);
+ folio = page_folio(pages[ip]);
+ folio_clear_dirty(folio);
folio_mark_uptodate(folio);
folio_unlock(folio);
folio_put(folio);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index fce9beb214f0..43e652a2adaf 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1483,7 +1483,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work)
sc_put(sc);
}
-/* socket shutdown does a del_timer_sync against this as it tears down.
+/* socket shutdown does a timer_delete_sync against this as it tears down.
* we can't start this timer until we've got to the point in sc buildup
* where shutdown is going to be involved */
static void o2net_idle_timer(struct timer_list *t)
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 1ad7106741f8..3ad7baf67658 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -505,5 +505,5 @@ static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
ocfs2_filecheck_handle_entry(ent, entry);
exit:
- return (!ret ? count : ret);
+ return ret ?: count;
}
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index e272429da3db..de7f12858729 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -674,7 +674,7 @@ out_put:
break;
}
out:
- kfree(rec);
+ ocfs2_free_quota_recovery(rec);
return status;
}
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index ddd761cf44c8..a28c127b9934 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -691,8 +691,7 @@ static void __exit ocfs2_stack_glue_exit(void)
memset(&locking_max_version, 0,
sizeof(struct ocfs2_protocol_version));
ocfs2_sysfs_exit();
- if (ocfs2_table_header)
- unregister_sysctl_table(ocfs2_table_header);
+ unregister_sysctl_table(ocfs2_table_header);
}
MODULE_AUTHOR("Oracle");
diff --git a/fs/pipe.c b/fs/pipe.c
index da45edd68c41..45077c37bad1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,6 +26,7 @@
#include <linux/memcontrol.h>
#include <linux/watch_queue.h>
#include <linux/sysctl.h>
+#include <linux/sort.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
@@ -76,8 +77,6 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
* -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
*/
-#define cmp_int(l, r) ((l > r) - (l < r))
-
#ifdef CONFIG_PROVE_LOCKING
static int pipe_lock_cmp_fn(const struct lockdep_map *a,
const struct lockdep_map *b)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fe33a5843fbd..c667702dc69b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -827,7 +827,13 @@ static const struct file_operations proc_single_file_operations = {
.release = single_release,
};
-
+/*
+ * proc_mem_open() can return errno, NULL or mm_struct*.
+ *
+ * - Returns NULL if the task has no mm (PF_KTHREAD or PF_EXITING)
+ * - Returns mm_struct* on success
+ * - Returns error code on failure
+ */
struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
{
struct task_struct *task = get_proc_task(inode);
@@ -854,8 +860,8 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
{
struct mm_struct *mm = proc_mem_open(inode, mode);
- if (IS_ERR(mm))
- return PTR_ERR(mm);
+ if (IS_ERR_OR_NULL(mm))
+ return mm ? PTR_ERR(mm) : -ESRCH;
file->private_data = mm;
return 0;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 23fc771100ae..999af26c7298 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -22,6 +22,12 @@
#define KPMMASK (KPMSIZE - 1)
#define KPMBITS (KPMSIZE * BITS_PER_BYTE)
+enum kpage_operation {
+ KPAGE_FLAGS,
+ KPAGE_COUNT,
+ KPAGE_CGROUP,
+};
+
static inline unsigned long get_max_dump_pfn(void)
{
#ifdef CONFIG_SPARSEMEM
@@ -37,19 +43,17 @@ static inline unsigned long get_max_dump_pfn(void)
#endif
}
-/* /proc/kpagecount - an array exposing page mapcounts
- *
- * Each entry is a u64 representing the corresponding
- * physical page mapcount.
- */
-static ssize_t kpagecount_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t kpage_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos,
+ enum kpage_operation op)
{
const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf;
+ struct page *page;
unsigned long src = *ppos;
unsigned long pfn;
ssize_t ret = 0;
+ u64 info;
pfn = src / KPMSIZE;
if (src & KPMMASK || count & KPMMASK)
@@ -59,24 +63,34 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
while (count > 0) {
- struct page *page;
- u64 mapcount = 0;
-
/*
* TODO: ZONE_DEVICE support requires to identify
* memmaps that were actually initialized.
*/
page = pfn_to_online_page(pfn);
- if (page) {
- struct folio *folio = page_folio(page);
- if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
- mapcount = folio_precise_page_mapcount(folio, page);
- else
- mapcount = folio_average_page_mapcount(folio);
- }
-
- if (put_user(mapcount, out)) {
+ if (page) {
+ switch (op) {
+ case KPAGE_FLAGS:
+ info = stable_page_flags(page);
+ break;
+ case KPAGE_COUNT:
+ if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
+ info = folio_precise_page_mapcount(page_folio(page), page);
+ else
+ info = folio_average_page_mapcount(page_folio(page));
+ break;
+ case KPAGE_CGROUP:
+ info = page_cgroup_ino(page);
+ break;
+ default:
+ info = 0;
+ break;
+ }
+ } else
+ info = 0;
+
+ if (put_user(info, out)) {
ret = -EFAULT;
break;
}
@@ -94,17 +108,23 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
return ret;
}
+/* /proc/kpagecount - an array exposing page mapcounts
+ *
+ * Each entry is a u64 representing the corresponding
+ * physical page mapcount.
+ */
+static ssize_t kpagecount_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return kpage_read(file, buf, count, ppos, KPAGE_COUNT);
+}
+
static const struct proc_ops kpagecount_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_lseek = mem_lseek,
.proc_read = kpagecount_read,
};
-/* /proc/kpageflags - an array exposing page flags
- *
- * Each entry is a u64 representing the corresponding
- * physical page flags.
- */
static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
{
@@ -225,47 +245,17 @@ u64 stable_page_flags(const struct page *page)
#endif
return u;
-};
+}
+/* /proc/kpageflags - an array exposing page flags
+ *
+ * Each entry is a u64 representing the corresponding
+ * physical page flags.
+ */
static ssize_t kpageflags_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
- const unsigned long max_dump_pfn = get_max_dump_pfn();
- u64 __user *out = (u64 __user *)buf;
- unsigned long src = *ppos;
- unsigned long pfn;
- ssize_t ret = 0;
-
- pfn = src / KPMSIZE;
- if (src & KPMMASK || count & KPMMASK)
- return -EINVAL;
- if (src >= max_dump_pfn * KPMSIZE)
- return 0;
- count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
-
- while (count > 0) {
- /*
- * TODO: ZONE_DEVICE support requires to identify
- * memmaps that were actually initialized.
- */
- struct page *page = pfn_to_online_page(pfn);
-
- if (put_user(stable_page_flags(page), out)) {
- ret = -EFAULT;
- break;
- }
-
- pfn++;
- out++;
- count -= KPMSIZE;
-
- cond_resched();
- }
-
- *ppos += (char __user *)out - buf;
- if (!ret)
- ret = (char __user *)out - buf;
- return ret;
+ return kpage_read(file, buf, count, ppos, KPAGE_FLAGS);
}
static const struct proc_ops kpageflags_proc_ops = {
@@ -276,53 +266,10 @@ static const struct proc_ops kpageflags_proc_ops = {
#ifdef CONFIG_MEMCG
static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ size_t count, loff_t *ppos)
{
- const unsigned long max_dump_pfn = get_max_dump_pfn();
- u64 __user *out = (u64 __user *)buf;
- struct page *ppage;
- unsigned long src = *ppos;
- unsigned long pfn;
- ssize_t ret = 0;
- u64 ino;
-
- pfn = src / KPMSIZE;
- if (src & KPMMASK || count & KPMMASK)
- return -EINVAL;
- if (src >= max_dump_pfn * KPMSIZE)
- return 0;
- count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);
-
- while (count > 0) {
- /*
- * TODO: ZONE_DEVICE support requires to identify
- * memmaps that were actually initialized.
- */
- ppage = pfn_to_online_page(pfn);
-
- if (ppage)
- ino = page_cgroup_ino(ppage);
- else
- ino = 0;
-
- if (put_user(ino, out)) {
- ret = -EFAULT;
- break;
- }
-
- pfn++;
- out++;
- count -= KPMSIZE;
-
- cond_resched();
- }
-
- *ppos += (char __user *)out - buf;
- if (!ret)
- ret = (char __user *)out - buf;
- return ret;
+ return kpage_read(file, buf, count, ppos, KPAGE_CGROUP);
}
-
static const struct proc_ops kpagecgroup_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_lseek = mem_lseek,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 994cde10e3f4..27972c0749e7 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -212,8 +212,8 @@ static int proc_maps_open(struct inode *inode, struct file *file,
priv->inode = inode;
priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR(priv->mm)) {
- int err = PTR_ERR(priv->mm);
+ if (IS_ERR_OR_NULL(priv->mm)) {
+ int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH;
seq_release_private(inode, file);
return err;
@@ -1325,8 +1325,8 @@ static int smaps_rollup_open(struct inode *inode, struct file *file)
priv->inode = inode;
priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR(priv->mm)) {
- ret = PTR_ERR(priv->mm);
+ if (IS_ERR_OR_NULL(priv->mm)) {
+ ret = priv->mm ? PTR_ERR(priv->mm) : -ESRCH;
single_release(inode, file);
goto out_free;
@@ -2069,8 +2069,8 @@ static int pagemap_open(struct inode *inode, struct file *file)
struct mm_struct *mm;
mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR(mm))
- return PTR_ERR(mm);
+ if (IS_ERR_OR_NULL(mm))
+ return mm ? PTR_ERR(mm) : -ESRCH;
file->private_data = mm;
return 0;
}
@@ -2087,7 +2087,8 @@ static int pagemap_release(struct inode *inode, struct file *file)
#define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
PAGE_IS_FILE | PAGE_IS_PRESENT | \
PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
- PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY)
+ PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY | \
+ PAGE_IS_GUARD)
#define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)
struct pagemap_scan_private {
@@ -2128,12 +2129,14 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
- if (p->masks_of_interest & PAGE_IS_FILE) {
- swp = pte_to_swp_entry(pte);
- if (is_pfn_swap_entry(swp) &&
- !folio_test_anon(pfn_swap_entry_folio(swp)))
- categories |= PAGE_IS_FILE;
- }
+ swp = pte_to_swp_entry(pte);
+ if (is_guard_swp_entry(swp))
+ categories |= PAGE_IS_GUARD;
+ else if ((p->masks_of_interest & PAGE_IS_FILE) &&
+ is_pfn_swap_entry(swp) &&
+ !folio_test_anon(pfn_swap_entry_folio(swp)))
+ categories |= PAGE_IS_FILE;
+
if (pte_swp_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
}
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index bce674533000..59bfd61d653a 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -260,8 +260,8 @@ static int maps_open(struct inode *inode, struct file *file,
priv->inode = inode;
priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
- if (IS_ERR(priv->mm)) {
- int err = PTR_ERR(priv->mm);
+ if (IS_ERR_OR_NULL(priv->mm)) {
+ int err = priv->mm ? PTR_ERR(priv->mm) : -ESRCH;
seq_release_private(inode, file);
return err;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index ecf774a8f1ca..66093fa78aed 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -151,8 +151,7 @@ extern bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 eof,
bool from_readdir);
extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
unsigned int bytes_written);
-void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result,
- bool was_async);
+void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result);
extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int);
extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode,
int flags,
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index f55457b4b82e..477792c07d45 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1725,7 +1725,7 @@ cifs_writev_callback(struct mid_q_entry *mid)
server->credits, server->in_flight,
0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
- cifs_write_subrequest_terminated(wdata, result, true);
+ cifs_write_subrequest_terminated(wdata, result);
release_mid(mid);
trace_smb3_rw_credits(credits.rreq_debug_id, credits.rreq_debug_index, 0,
server->credits, server->in_flight,
@@ -1813,7 +1813,7 @@ async_writev_out:
out:
if (rc) {
add_credits_and_wake_if(wdata->server, &wdata->credits, 0);
- cifs_write_subrequest_terminated(wdata, rc, false);
+ cifs_write_subrequest_terminated(wdata, rc);
}
}
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 950aa4f912f5..d2df10b8e6fd 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -130,7 +130,7 @@ fail:
else
trace_netfs_sreq(subreq, netfs_sreq_trace_fail);
add_credits_and_wake_if(wdata->server, &wdata->credits, 0);
- cifs_write_subrequest_terminated(wdata, rc, false);
+ cifs_write_subrequest_terminated(wdata, rc);
goto out;
}
@@ -219,7 +219,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq)
goto failed;
}
- if (subreq->rreq->origin != NETFS_DIO_READ)
+ if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+ subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
@@ -2423,8 +2424,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
return rc;
}
-void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result,
- bool was_async)
+void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result)
{
struct netfs_io_request *wreq = wdata->rreq;
struct netfs_inode *ictx = netfs_inode(wreq->inode);
@@ -2441,7 +2441,7 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t
netfs_resize_file(ictx, wrend, true);
}
- netfs_write_subrequest_terminated(&wdata->subreq, result, was_async);
+ netfs_write_subrequest_terminated(&wdata->subreq, result);
}
struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 4e28632b5fd6..399185ca7cac 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4888,7 +4888,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
- cifs_write_subrequest_terminated(wdata, result ?: written, true);
+ cifs_write_subrequest_terminated(wdata, result ?: written);
release_mid(mid);
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
server->credits, server->in_flight,
@@ -5061,7 +5061,7 @@ out:
-(int)wdata->credits.value,
cifs_trace_rw_credits_write_response_clear);
add_credits_and_wake_if(wdata->server, &wdata->credits, 0);
- cifs_write_subrequest_terminated(wdata, rc, true);
+ cifs_write_subrequest_terminated(wdata, rc);
}
}
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index b1091e70434a..a9602aae21ef 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -149,6 +149,27 @@ config SQUASHFS_XATTR
If unsure, say N.
+config SQUASHFS_COMP_CACHE_FULL
+ bool "Enable full caching of compressed blocks"
+ depends on SQUASHFS
+ default n
+ help
+ This option enables caching of all compressed blocks, Without caching,
+ repeated reads of the same files trigger excessive disk I/O, significantly
+ reducinng performance in workloads like fio-based benchmarks.
+
+ For example, fio tests (iodepth=1, numjobs=1, ioengine=psync) show:
+ With caching: IOPS=2223, BW=278MiB/s (291MB/s)
+ Without caching: IOPS=815, BW=102MiB/s (107MB/s)
+
+ Enabling this option restores performance to pre-regression levels by
+ caching all compressed blocks in the page cache, reducing disk I/O for
+ repeated reads. However, this increases memory usage, which may be a
+ concern in memory-constrained environments.
+
+ Enable this option if your workload involves frequent repeated reads and
+ memory usage is not a limiting factor. If unsure, say N.
+
config SQUASHFS_ZLIB
bool "Include support for ZLIB compressed file systems"
depends on SQUASHFS
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2dc730800f44..3061043e915c 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -88,6 +88,10 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
struct bio_vec *bv;
int idx = 0;
int err = 0;
+#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
+ struct page **cache_pages = kmalloc_array(page_count,
+ sizeof(void *), GFP_KERNEL | __GFP_ZERO);
+#endif
bio_for_each_segment_all(bv, fullbio, iter_all) {
struct page *page = bv->bv_page;
@@ -110,6 +114,11 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
head_to_cache = page;
else if (idx == page_count - 1 && index + length != read_end)
tail_to_cache = page;
+#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
+ /* Cache all pages in the BIO for repeated reads */
+ else if (cache_pages)
+ cache_pages[idx] = page;
+#endif
if (!bio || idx != end_idx) {
struct bio *new = bio_alloc_clone(bdev, fullbio,
@@ -163,6 +172,25 @@ static int squashfs_bio_read_cached(struct bio *fullbio,
}
}
+#ifdef CONFIG_SQUASHFS_COMP_CACHE_FULL
+ if (!cache_pages)
+ goto out;
+
+ for (idx = 0; idx < page_count; idx++) {
+ if (!cache_pages[idx])
+ continue;
+ int ret = add_to_page_cache_lru(cache_pages[idx], cache_mapping,
+ (read_start >> PAGE_SHIFT) + idx,
+ GFP_NOIO);
+
+ if (!ret) {
+ SetPageUptodate(cache_pages[idx]);
+ unlock_page(cache_pages[idx]);
+ }
+ }
+ kfree(cache_pages);
+out:
+#endif
return 0;
}
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 67c55fe32ce8..992ea0e37257 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -202,6 +202,11 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
+ if (!msblk->devblksize) {
+ errorf(fc, "squashfs: unable to set blocksize\n");
+ return -EINVAL;
+ }
+
msblk->devblksize_log2 = ffz(~msblk->devblksize);
mutex_init(&msblk->meta_index_mutex);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 26a04a783489..63151feb9c3f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -436,6 +436,25 @@ allocate_blocks:
return 0;
}
+static bool
+xfs_ioend_needs_wq_completion(
+ struct iomap_ioend *ioend)
+{
+ /* Changing inode size requires a transaction. */
+ if (xfs_ioend_is_append(ioend))
+ return true;
+
+ /* Extent manipulation requires a transaction. */
+ if (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))
+ return true;
+
+ /* Page cache invalidation cannot be done in irq context. */
+ if (ioend->io_flags & IOMAP_IOEND_DONTCACHE)
+ return true;
+
+ return false;
+}
+
static int
xfs_submit_ioend(
struct iomap_writepage_ctx *wpc,
@@ -460,8 +479,7 @@ xfs_submit_ioend(
memalloc_nofs_restore(nofs_flag);
/* send ioends that might require a transaction to the completion wq */
- if (xfs_ioend_is_append(ioend) ||
- (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)))
+ if (xfs_ioend_needs_wq_completion(ioend))
ioend->io_bio.bi_end_io = xfs_end_bio;
if (status)
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index d613a4094db6..9c00fc5baa30 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -290,8 +290,6 @@ xfs_zone_gc_query_cb(
return 0;
}
-#define cmp_int(l, r) ((l > r) - (l < r))
-
static int
xfs_zone_gc_rmap_rec_cmp(
const void *a,