summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-03 12:48:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-03 12:48:18 -0700
commit6238729bfce13f94b701766996a5d116d2df8bff (patch)
tree90180a22e9b5193af77ceff90bc3b1c301483bcb
parentcf06d791f840be97f726ecaaea872a876ff62436 (diff)
parentcb403594701cd36f7f3f868258655d56f9afaf8e (diff)
Merge tag 'fuse-update-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi: - Extend copy_file_range interface to be fully 64bit capable (Miklos) - Add selftest for fusectl (Chen Linxuan) - Move fuse docs into a separate directory (Bagas Sanjaya) - Allow fuse to enter freezable state in some cases (Sergey Senozhatsky) - Clean up writeback accounting after removing tmp page copies (Joanne) - Optimize virtiofs request handling (Li RongQing) - Add synchronous FUSE_INIT support (Miklos) - Allow server to request prune of unused inodes (Miklos) - Fix deadlock with AIO/sync release (Darrick) - Add some prep patches for block/iomap support (Darrick) - Misc fixes and cleanups * tag 'fuse-update-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (26 commits) fuse: move CREATE_TRACE_POINTS to a separate file fuse: move the backing file idr and code into a new source file fuse: enable FUSE_SYNCFS for all fuseblk servers fuse: capture the unique id of fuse commands being sent fuse: fix livelock in synchronous file put from fuseblk workers mm: fix lockdep issues in writeback handling fuse: add prune notification fuse: remove redundant calls to fuse_copy_finish() in fuse_notify() fuse: fix possibly missing fuse_copy_finish() call in fuse_notify() fuse: remove FUSE_NOTIFY_CODE_MAX from <uapi/linux/fuse.h> fuse: remove fuse_readpages_end() null mapping check fuse: fix references to fuse.rst -> fuse/fuse.rst fuse: allow synchronous FUSE_INIT fuse: zero initialize inode private data fuse: remove unused 'inode' parameter in fuse_passthrough_open virtio_fs: fix the hash table using in virtio_fs_enqueue_req() mm: remove BDI_CAP_WRITEBACK_ACCT fuse: use default writeback accounting virtio_fs: Remove redundant spinlock in virtio_fs_request_complete() fuse: remove unneeded offset assignment when filling write pages ...
-rw-r--r--Documentation/filesystems/fuse/fuse-io-uring.rst (renamed from Documentation/filesystems/fuse-io-uring.rst)0
-rw-r--r--Documentation/filesystems/fuse/fuse-io.rst (renamed from Documentation/filesystems/fuse-io.rst)2
-rw-r--r--Documentation/filesystems/fuse/fuse-passthrough.rst (renamed from Documentation/filesystems/fuse-passthrough.rst)0
-rw-r--r--Documentation/filesystems/fuse/fuse.rst (renamed from Documentation/filesystems/fuse.rst)20
-rw-r--r--Documentation/filesystems/fuse/index.rst14
-rw-r--r--Documentation/filesystems/index.rst5
-rw-r--r--Documentation/filesystems/sysfs.rst2
-rw-r--r--Documentation/translations/zh_CN/filesystems/sysfs.txt2
-rw-r--r--Documentation/translations/zh_TW/filesystems/sysfs.txt2
-rw-r--r--MAINTAINERS3
-rw-r--r--fs/fuse/Kconfig2
-rw-r--r--fs/fuse/Makefile5
-rw-r--r--fs/fuse/backing.c179
-rw-r--r--fs/fuse/cuse.c3
-rw-r--r--fs/fuse/dev.c227
-rw-r--r--fs/fuse/dev_uring.c8
-rw-r--r--fs/fuse/file.c86
-rw-r--r--fs/fuse/fuse_dev_i.h13
-rw-r--r--fs/fuse/fuse_i.h70
-rw-r--r--fs/fuse/inode.c76
-rw-r--r--fs/fuse/iomode.c3
-rw-r--r--fs/fuse/passthrough.c167
-rw-r--r--fs/fuse/trace.c13
-rw-r--r--fs/fuse/virtio_fs.c12
-rw-r--r--include/linux/backing-dev.h14
-rw-r--r--include/linux/wait.h12
-rw-r--r--include/uapi/linux/fuse.h22
-rw-r--r--mm/backing-dev.c2
-rw-r--r--mm/page-writeback.c45
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/filesystems/fuse/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/fuse/Makefile21
-rw-r--r--tools/testing/selftests/filesystems/fuse/fuse_mnt.c146
-rw-r--r--tools/testing/selftests/filesystems/fuse/fusectl_test.c140
34 files changed, 922 insertions, 398 deletions
diff --git a/Documentation/filesystems/fuse-io-uring.rst b/Documentation/filesystems/fuse/fuse-io-uring.rst
index d73dd0dbd238..d73dd0dbd238 100644
--- a/Documentation/filesystems/fuse-io-uring.rst
+++ b/Documentation/filesystems/fuse/fuse-io-uring.rst
diff --git a/Documentation/filesystems/fuse-io.rst b/Documentation/filesystems/fuse/fuse-io.rst
index 6464de4266ad..d736ac4cb483 100644
--- a/Documentation/filesystems/fuse-io.rst
+++ b/Documentation/filesystems/fuse/fuse-io.rst
@@ -1,7 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
==============
-Fuse I/O Modes
+FUSE I/O Modes
==============
Fuse supports the following I/O modes:
diff --git a/Documentation/filesystems/fuse-passthrough.rst b/Documentation/filesystems/fuse/fuse-passthrough.rst
index 2b0e7c2da54a..2b0e7c2da54a 100644
--- a/Documentation/filesystems/fuse-passthrough.rst
+++ b/Documentation/filesystems/fuse/fuse-passthrough.rst
diff --git a/Documentation/filesystems/fuse.rst b/Documentation/filesystems/fuse/fuse.rst
index 1e31e87aee68..0fbd5a03fdc9 100644
--- a/Documentation/filesystems/fuse.rst
+++ b/Documentation/filesystems/fuse/fuse.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0
-====
-FUSE
-====
+=============
+FUSE Overview
+=============
Definitions
===========
@@ -129,6 +129,20 @@ For each connection the following files exist within this directory:
connection. This means that all waiting requests will be aborted an
error returned for all aborted and new requests.
+ max_background
+ The maximum number of background requests that can be outstanding
+ at a time. When the number of background requests reaches this limit,
+ further requests will be blocked until some are completed, potentially
+ causing I/O operations to stall.
+
+ congestion_threshold
+ The threshold of background requests at which the kernel considers
+ the filesystem to be congested. When the number of background requests
+ exceeds this value, the kernel will skip asynchronous readahead
+ operations, reducing read-ahead optimizations but preserving essential
+ I/O, as well as suspending non-synchronous writeback operations
+ (WB_SYNC_NONE), delaying page cache flushing to the filesystem.
+
Only the owner of the mount may read or write these files.
Interrupting filesystem operations
diff --git a/Documentation/filesystems/fuse/index.rst b/Documentation/filesystems/fuse/index.rst
new file mode 100644
index 000000000000..393a845214da
--- /dev/null
+++ b/Documentation/filesystems/fuse/index.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================
+FUSE (Filesystem in Userspace) Technical Documentation
+======================================================
+
+.. toctree::
+ :maxdepth: 2
+ :numbered:
+
+ fuse
+ fuse-io
+ fuse-io-uring
+ fuse-passthrough
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 622187a96bdc..af516e528ded 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -95,10 +95,7 @@ Documentation for filesystem implementations.
hfs
hfsplus
hpfs
- fuse
- fuse-io
- fuse-io-uring
- fuse-passthrough
+ fuse/index
inotify
isofs
nilfs2
diff --git a/Documentation/filesystems/sysfs.rst b/Documentation/filesystems/sysfs.rst
index c32993bc83c7..be966f27f284 100644
--- a/Documentation/filesystems/sysfs.rst
+++ b/Documentation/filesystems/sysfs.rst
@@ -321,7 +321,7 @@ span multiple bus types).
fs/ contains a directory for some filesystems. Currently each
filesystem wanting to export attributes must create its own hierarchy
-below fs/ (see ./fuse.rst for an example).
+below fs/ (see fuse/fuse.rst for an example).
module/ contains parameter values and state information for all
loaded system modules, for both builtin and loadable modules.
diff --git a/Documentation/translations/zh_CN/filesystems/sysfs.txt b/Documentation/translations/zh_CN/filesystems/sysfs.txt
index 547062759e60..b17c9f638628 100644
--- a/Documentation/translations/zh_CN/filesystems/sysfs.txt
+++ b/Documentation/translations/zh_CN/filesystems/sysfs.txt
@@ -282,7 +282,7 @@ drivers/ 包含了每个已为特定总线上的设备而挂载的驱动程序
假定驱动没有跨越多个总线类型)。
fs/ 包含了一个为文件系统设立的目录。现在每个想要导出属性的文件系统必须
-在 fs/ 下创建自己的层次结构(参见Documentation/filesystems/fuse.rst)。
+在 fs/ 下创建自己的层次结构(参见Documentation/filesystems/fuse/fuse.rst)。
dev/ 包含两个子目录: char/ 和 block/。在这两个子目录中,有以
<major>:<minor> 格式命名的符号链接。这些符号链接指向 sysfs 目录
diff --git a/Documentation/translations/zh_TW/filesystems/sysfs.txt b/Documentation/translations/zh_TW/filesystems/sysfs.txt
index 978462d5fe14..d1cee02ef1de 100644
--- a/Documentation/translations/zh_TW/filesystems/sysfs.txt
+++ b/Documentation/translations/zh_TW/filesystems/sysfs.txt
@@ -285,7 +285,7 @@ drivers/ 包含了每個已爲特定總線上的設備而掛載的驅動程序
假定驅動沒有跨越多個總線類型)。
fs/ 包含了一個爲文件系統設立的目錄。現在每個想要導出屬性的文件系統必須
-在 fs/ 下創建自己的層次結構(參見Documentation/filesystems/fuse.rst)。
+在 fs/ 下創建自己的層次結構(參見Documentation/filesystems/fuse/fuse.rst)。
dev/ 包含兩個子目錄: char/ 和 block/。在這兩個子目錄中,有以
<major>:<minor> 格式命名的符號鏈接。這些符號鏈接指向 sysfs 目錄
diff --git a/MAINTAINERS b/MAINTAINERS
index 4abe33914884..fa3e51d84399 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10208,9 +10208,10 @@ L: linux-fsdevel@vger.kernel.org
S: Maintained
W: https://github.com/libfuse/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git
-F: Documentation/filesystems/fuse*
+F: Documentation/filesystems/fuse/*
F: fs/fuse/
F: include/uapi/linux/fuse.h
+F: tools/testing/selftests/filesystems/fuse/
FUTEX SUBSYSTEM
M: Thomas Gleixner <tglx@linutronix.de>
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index a774166264de..3a4ae632c94a 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -13,7 +13,7 @@ config FUSE_FS
although chances are your distribution already has that library
installed if you've installed the "fuse" package itself.
- See <file:Documentation/filesystems/fuse.rst> for more information.
+ See <file:Documentation/filesystems/fuse/fuse.rst> for more information.
See <file:Documentation/Changes> for needed library/utility version.
If you want to develop a userspace FS, or if you want to use
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 3f0f312a31c1..22ad9538dfc4 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -10,10 +10,11 @@ obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
-fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
+fuse-y := trace.o # put trace.o first so we see ftrace errors sooner
+fuse-y += dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
fuse-y += iomode.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
-fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
+fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o backing.o
fuse-$(CONFIG_SYSCTL) += sysctl.o
fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c
new file mode 100644
index 000000000000..4afda419dd14
--- /dev/null
+++ b/fs/fuse/backing.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FUSE passthrough to backing file.
+ *
+ * Copyright (c) 2023 CTERA Networks.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/file.h>
+
+struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
+{
+ if (fb && refcount_inc_not_zero(&fb->count))
+ return fb;
+ return NULL;
+}
+
+static void fuse_backing_free(struct fuse_backing *fb)
+{
+ pr_debug("%s: fb=0x%p\n", __func__, fb);
+
+ if (fb->file)
+ fput(fb->file);
+ put_cred(fb->cred);
+ kfree_rcu(fb, rcu);
+}
+
+void fuse_backing_put(struct fuse_backing *fb)
+{
+ if (fb && refcount_dec_and_test(&fb->count))
+ fuse_backing_free(fb);
+}
+
+void fuse_backing_files_init(struct fuse_conn *fc)
+{
+ idr_init(&fc->backing_files_map);
+}
+
+static int fuse_backing_id_alloc(struct fuse_conn *fc, struct fuse_backing *fb)
+{
+ int id;
+
+ idr_preload(GFP_KERNEL);
+ spin_lock(&fc->lock);
+ /* FIXME: xarray might be space inefficient */
+ id = idr_alloc_cyclic(&fc->backing_files_map, fb, 1, 0, GFP_ATOMIC);
+ spin_unlock(&fc->lock);
+ idr_preload_end();
+
+ WARN_ON_ONCE(id == 0);
+ return id;
+}
+
+static struct fuse_backing *fuse_backing_id_remove(struct fuse_conn *fc,
+ int id)
+{
+ struct fuse_backing *fb;
+
+ spin_lock(&fc->lock);
+ fb = idr_remove(&fc->backing_files_map, id);
+ spin_unlock(&fc->lock);
+
+ return fb;
+}
+
+static int fuse_backing_id_free(int id, void *p, void *data)
+{
+ struct fuse_backing *fb = p;
+
+ WARN_ON_ONCE(refcount_read(&fb->count) != 1);
+ fuse_backing_free(fb);
+ return 0;
+}
+
+void fuse_backing_files_free(struct fuse_conn *fc)
+{
+ idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
+ idr_destroy(&fc->backing_files_map);
+}
+
+int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
+{
+ struct file *file;
+ struct super_block *backing_sb;
+ struct fuse_backing *fb = NULL;
+ int res;
+
+ pr_debug("%s: fd=%d flags=0x%x\n", __func__, map->fd, map->flags);
+
+ /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
+ res = -EPERM;
+ if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
+ goto out;
+
+ res = -EINVAL;
+ if (map->flags || map->padding)
+ goto out;
+
+ file = fget_raw(map->fd);
+ res = -EBADF;
+ if (!file)
+ goto out;
+
+ /* read/write/splice/mmap passthrough only relevant for regular files */
+ res = d_is_dir(file->f_path.dentry) ? -EISDIR : -EINVAL;
+ if (!d_is_reg(file->f_path.dentry))
+ goto out_fput;
+
+ backing_sb = file_inode(file)->i_sb;
+ res = -ELOOP;
+ if (backing_sb->s_stack_depth >= fc->max_stack_depth)
+ goto out_fput;
+
+ fb = kmalloc(sizeof(struct fuse_backing), GFP_KERNEL);
+ res = -ENOMEM;
+ if (!fb)
+ goto out_fput;
+
+ fb->file = file;
+ fb->cred = prepare_creds();
+ refcount_set(&fb->count, 1);
+
+ res = fuse_backing_id_alloc(fc, fb);
+ if (res < 0) {
+ fuse_backing_free(fb);
+ fb = NULL;
+ }
+
+out:
+ pr_debug("%s: fb=0x%p, ret=%i\n", __func__, fb, res);
+
+ return res;
+
+out_fput:
+ fput(file);
+ goto out;
+}
+
+int fuse_backing_close(struct fuse_conn *fc, int backing_id)
+{
+ struct fuse_backing *fb = NULL;
+ int err;
+
+ pr_debug("%s: backing_id=%d\n", __func__, backing_id);
+
+ /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
+ err = -EPERM;
+ if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
+ goto out;
+
+ err = -EINVAL;
+ if (backing_id <= 0)
+ goto out;
+
+ err = -ENOENT;
+ fb = fuse_backing_id_remove(fc, backing_id);
+ if (!fb)
+ goto out;
+
+ fuse_backing_put(fb);
+ err = 0;
+out:
+ pr_debug("%s: fb=0x%p, err=%i\n", __func__, fb, err);
+
+ return err;
+}
+
+struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id)
+{
+ struct fuse_backing *fb;
+
+ rcu_read_lock();
+ fb = idr_find(&fc->backing_files_map, backing_id);
+ fb = fuse_backing_get(fb);
+ rcu_read_unlock();
+
+ return fb;
+}
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index b39844d75a80..28c96961e85d 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -52,6 +52,7 @@
#include <linux/user_namespace.h>
#include "fuse_i.h"
+#include "fuse_dev_i.h"
#define CUSE_CONNTBL_LEN 64
@@ -547,7 +548,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
*/
static int cuse_channel_release(struct inode *inode, struct file *file)
{
- struct fuse_dev *fud = file->private_data;
+ struct fuse_dev *fud = __fuse_get_dev(file);
struct cuse_conn *cc = fc_to_cc(fud->fc);
/* remove from the conntbl, no more access from this point on */
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ad8645c0f9fe..132f38619d70 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -25,7 +25,6 @@
#include <linux/sched.h>
#include <linux/seq_file.h>
-#define CREATE_TRACE_POINTS
#include "fuse_trace.h"
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
@@ -207,8 +206,9 @@ static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap,
if (fuse_block_alloc(fc, for_background)) {
err = -EINTR;
- if (wait_event_killable_exclusive(fc->blocked_waitq,
- !fuse_block_alloc(fc, for_background)))
+ if (wait_event_state_exclusive(fc->blocked_waitq,
+ !fuse_block_alloc(fc, for_background),
+ (TASK_KILLABLE | TASK_FREEZABLE)))
goto out;
}
/* Matches smp_wmb() in fuse_set_initialized() */
@@ -322,6 +322,7 @@ unsigned int fuse_req_hash(u64 unique)
{
return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
}
+EXPORT_SYMBOL_GPL(fuse_req_hash);
/*
* A new request is available, wake fiq->waitq
@@ -369,12 +370,32 @@ void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
}
}
+static inline void fuse_request_assign_unique_locked(struct fuse_iqueue *fiq,
+ struct fuse_req *req)
+{
+ if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
+ req->in.h.unique = fuse_get_unique_locked(fiq);
+
+ /* tracepoint captures in.h.unique and in.h.len */
+ trace_fuse_request_send(req);
+}
+
+inline void fuse_request_assign_unique(struct fuse_iqueue *fiq,
+ struct fuse_req *req)
+{
+ if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
+ req->in.h.unique = fuse_get_unique(fiq);
+
+ /* tracepoint captures in.h.unique and in.h.len */
+ trace_fuse_request_send(req);
+}
+EXPORT_SYMBOL_GPL(fuse_request_assign_unique);
+
static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
{
spin_lock(&fiq->lock);
if (fiq->connected) {
- if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
- req->in.h.unique = fuse_get_unique_locked(fiq);
+ fuse_request_assign_unique_locked(fiq, req);
list_add_tail(&req->list, &fiq->pending);
fuse_dev_wake_and_unlock(fiq);
} else {
@@ -397,7 +418,6 @@ static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
req->in.h.len = sizeof(struct fuse_in_header) +
fuse_len_args(req->args->in_numargs,
(struct fuse_arg *) req->args->in_args);
- trace_fuse_request_send(req);
fiq->ops->send_req(fiq, req);
}
@@ -687,10 +707,10 @@ static bool fuse_request_queue_background_uring(struct fuse_conn *fc,
{
struct fuse_iqueue *fiq = &fc->iq;
- req->in.h.unique = fuse_get_unique(fiq);
req->in.h.len = sizeof(struct fuse_in_header) +
fuse_len_args(req->args->in_numargs,
(struct fuse_arg *) req->args->in_args);
+ fuse_request_assign_unique(fiq, req);
return fuse_uring_queue_bq_req(req);
}
@@ -1528,14 +1548,34 @@ static int fuse_dev_open(struct inode *inode, struct file *file)
return 0;
}
+struct fuse_dev *fuse_get_dev(struct file *file)
+{
+ struct fuse_dev *fud = __fuse_get_dev(file);
+ int err;
+
+ if (likely(fud))
+ return fud;
+
+ err = wait_event_interruptible(fuse_dev_waitq,
+ READ_ONCE(file->private_data) != FUSE_DEV_SYNC_INIT);
+ if (err)
+ return ERR_PTR(err);
+
+ fud = __fuse_get_dev(file);
+ if (!fud)
+ return ERR_PTR(-EPERM);
+
+ return fud;
+}
+
static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
{
struct fuse_copy_state cs;
struct file *file = iocb->ki_filp;
struct fuse_dev *fud = fuse_get_dev(file);
- if (!fud)
- return -EPERM;
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
if (!user_backed_iter(to))
return -EINVAL;
@@ -1555,8 +1595,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct fuse_copy_state cs;
struct fuse_dev *fud = fuse_get_dev(in);
- if (!fud)
- return -EPERM;
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
GFP_KERNEL);
@@ -1600,35 +1640,31 @@ static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_poll_wakeup_out outarg;
- int err = -EINVAL;
+ int err;
if (size != sizeof(outarg))
- goto err;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto err;
+ return err;
fuse_copy_finish(cs);
return fuse_notify_poll_wakeup(fc, &outarg);
-
-err:
- fuse_copy_finish(cs);
- return err;
}
static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_inval_inode_out outarg;
- int err = -EINVAL;
+ int err;
if (size != sizeof(outarg))
- goto err;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto err;
+ return err;
fuse_copy_finish(cs);
down_read(&fc->killsb);
@@ -1636,10 +1672,6 @@ static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
outarg.off, outarg.len);
up_read(&fc->killsb);
return err;
-
-err:
- fuse_copy_finish(cs);
- return err;
}
static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
@@ -1647,29 +1679,26 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
{
struct fuse_notify_inval_entry_out outarg;
int err;
- char *buf = NULL;
+ char *buf;
struct qstr name;
- err = -EINVAL;
if (size < sizeof(outarg))
- goto err;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto err;
+ return err;
- err = -ENAMETOOLONG;
if (outarg.namelen > fc->name_max)
- goto err;
+ return -ENAMETOOLONG;
err = -EINVAL;
if (size != sizeof(outarg) + outarg.namelen + 1)
- goto err;
+ return -EINVAL;
- err = -ENOMEM;
buf = kzalloc(outarg.namelen + 1, GFP_KERNEL);
if (!buf)
- goto err;
+ return -ENOMEM;
name.name = buf;
name.len = outarg.namelen;
@@ -1682,12 +1711,8 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
down_read(&fc->killsb);
err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags);
up_read(&fc->killsb);
- kfree(buf);
- return err;
-
err:
kfree(buf);
- fuse_copy_finish(cs);
return err;
}
@@ -1696,29 +1721,25 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
{
struct fuse_notify_delete_out outarg;
int err;
- char *buf = NULL;
+ char *buf;
struct qstr name;
- err = -EINVAL;
if (size < sizeof(outarg))
- goto err;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto err;
+ return err;
- err = -ENAMETOOLONG;
if (outarg.namelen > fc->name_max)
- goto err;
+ return -ENAMETOOLONG;
- err = -EINVAL;
if (size != sizeof(outarg) + outarg.namelen + 1)
- goto err;
+ return -EINVAL;
- err = -ENOMEM;
buf = kzalloc(outarg.namelen + 1, GFP_KERNEL);
if (!buf)
- goto err;
+ return -ENOMEM;
name.name = buf;
name.len = outarg.namelen;
@@ -1731,12 +1752,8 @@ static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
down_read(&fc->killsb);
err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0);
up_read(&fc->killsb);
- kfree(buf);
- return err;
-
err:
kfree(buf);
- fuse_copy_finish(cs);
return err;
}
@@ -1754,17 +1771,15 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
loff_t file_size;
loff_t end;
- err = -EINVAL;
if (size < sizeof(outarg))
- goto out_finish;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto out_finish;
+ return err;
- err = -EINVAL;
if (size - sizeof(outarg) != outarg.size)
- goto out_finish;
+ return -EINVAL;
nodeid = outarg.nodeid;
@@ -1824,8 +1839,6 @@ out_iput:
iput(inode);
out_up_killsb:
up_read(&fc->killsb);
-out_finish:
- fuse_copy_finish(cs);
return err;
}
@@ -1940,13 +1953,12 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
u64 nodeid;
int err;
- err = -EINVAL;
if (size != sizeof(outarg))
- goto copy_finish;
+ return -EINVAL;
err = fuse_copy_one(cs, &outarg, sizeof(outarg));
if (err)
- goto copy_finish;
+ return err;
fuse_copy_finish(cs);
@@ -1962,10 +1974,6 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
up_read(&fc->killsb);
return err;
-
-copy_finish:
- fuse_copy_finish(cs);
- return err;
}
/*
@@ -2044,6 +2052,42 @@ static int fuse_notify_inc_epoch(struct fuse_conn *fc)
return 0;
}
+static int fuse_notify_prune(struct fuse_conn *fc, unsigned int size,
+ struct fuse_copy_state *cs)
+{
+ struct fuse_notify_prune_out outarg;
+ const unsigned int batch = 512;
+ u64 *nodeids __free(kfree) = kmalloc(sizeof(u64) * batch, GFP_KERNEL);
+ unsigned int num, i;
+ int err;
+
+ if (!nodeids)
+ return -ENOMEM;
+
+ if (size < sizeof(outarg))
+ return -EINVAL;
+
+ err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+ if (err)
+ return err;
+
+ if (size - sizeof(outarg) != outarg.count * sizeof(u64))
+ return -EINVAL;
+
+ for (; outarg.count; outarg.count -= num) {
+ num = min(batch, outarg.count);
+ err = fuse_copy_one(cs, nodeids, num * sizeof(u64));
+ if (err)
+ return err;
+
+ scoped_guard(rwsem_read, &fc->killsb) {
+ for (i = 0; i < num; i++)
+ fuse_try_prune_one_inode(fc, nodeids[i]);
+ }
+ }
+ return 0;
+}
+
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
@@ -2075,8 +2119,10 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
case FUSE_NOTIFY_INC_EPOCH:
return fuse_notify_inc_epoch(fc);
+ case FUSE_NOTIFY_PRUNE:
+ return fuse_notify_prune(fc, size, cs);
+
default:
- fuse_copy_finish(cs);
return -EINVAL;
}
}
@@ -2156,7 +2202,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
*/
if (!oh.unique) {
err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
- goto out;
+ goto copy_finish;
}
err = -EINVAL;
@@ -2229,7 +2275,7 @@ copy_finish:
static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
{
struct fuse_copy_state cs;
- struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
+ struct fuse_dev *fud = __fuse_get_dev(iocb->ki_filp);
if (!fud)
return -EPERM;
@@ -2251,11 +2297,10 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
unsigned idx;
struct pipe_buffer *bufs;
struct fuse_copy_state cs;
- struct fuse_dev *fud;
+ struct fuse_dev *fud = __fuse_get_dev(out);
size_t rem;
ssize_t ret;
- fud = fuse_get_dev(out);
if (!fud)
return -EPERM;
@@ -2341,7 +2386,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
struct fuse_iqueue *fiq;
struct fuse_dev *fud = fuse_get_dev(file);
- if (!fud)
+ if (IS_ERR(fud))
return EPOLLERR;
fiq = &fud->fc->iq;
@@ -2394,7 +2439,7 @@ static void end_polls(struct fuse_conn *fc)
* The same effect is usually achievable through killing the filesystem daemon
* and all users of the filesystem. The exception is the combination of an
* asynchronous request and the tricky deadlock (see
- * Documentation/filesystems/fuse.rst).
+ * Documentation/filesystems/fuse/fuse.rst).
*
* Aborting requests under I/O goes as follows: 1: Separate out unlocked
* requests, they should be finished off immediately. Locked requests will be
@@ -2488,7 +2533,7 @@ void fuse_wait_aborted(struct fuse_conn *fc)
int fuse_dev_release(struct inode *inode, struct file *file)
{
- struct fuse_dev *fud = fuse_get_dev(file);
+ struct fuse_dev *fud = __fuse_get_dev(file);
if (fud) {
struct fuse_conn *fc = fud->fc;
@@ -2519,8 +2564,8 @@ static int fuse_dev_fasync(int fd, struct file *file, int on)
{
struct fuse_dev *fud = fuse_get_dev(file);
- if (!fud)
- return -EPERM;
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
/* No locking - fasync_helper does its own locking */
return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
@@ -2530,7 +2575,7 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
{
struct fuse_dev *fud;
- if (new->private_data)
+ if (__fuse_get_dev(new))
return -EINVAL;
fud = fuse_dev_alloc_install(fc);
@@ -2561,7 +2606,7 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
* uses the same ioctl handler.
*/
if (fd_file(f)->f_op == file->f_op)
- fud = fuse_get_dev(fd_file(f));
+ fud = __fuse_get_dev(fd_file(f));
res = -EINVAL;
if (fud) {
@@ -2579,8 +2624,8 @@ static long fuse_dev_ioctl_backing_open(struct file *file,
struct fuse_dev *fud = fuse_get_dev(file);
struct fuse_backing_map map;
- if (!fud)
- return -EPERM;
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
return -EOPNOTSUPP;
@@ -2596,8 +2641,8 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
struct fuse_dev *fud = fuse_get_dev(file);
int backing_id;
- if (!fud)
- return -EPERM;
+ if (IS_ERR(fud))
+ return PTR_ERR(fud);
if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
return -EOPNOTSUPP;
@@ -2608,6 +2653,19 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
return fuse_backing_close(fud->fc, backing_id);
}
+static long fuse_dev_ioctl_sync_init(struct file *file)
+{
+ int err = -EINVAL;
+
+ mutex_lock(&fuse_mutex);
+ if (!__fuse_get_dev(file)) {
+ WRITE_ONCE(file->private_data, FUSE_DEV_SYNC_INIT);
+ err = 0;
+ }
+ mutex_unlock(&fuse_mutex);
+ return err;
+}
+
static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -2623,6 +2681,9 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
case FUSE_DEV_IOC_BACKING_CLOSE:
return fuse_dev_ioctl_backing_close(file, argp);
+ case FUSE_DEV_IOC_SYNC_INIT:
+ return fuse_dev_ioctl_sync_init(file);
+
default:
return -ENOTTY;
}
@@ -2631,7 +2692,7 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
#ifdef CONFIG_PROC_FS
static void fuse_dev_show_fdinfo(struct seq_file *seq, struct file *file)
{
- struct fuse_dev *fud = fuse_get_dev(file);
+ struct fuse_dev *fud = __fuse_get_dev(file);
if (!fud)
return;
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index a30c44234a4e..f6b12aebb8bb 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -7,6 +7,7 @@
#include "fuse_i.h"
#include "dev_uring_i.h"
#include "fuse_dev_i.h"
+#include "fuse_trace.h"
#include <linux/fs.h>
#include <linux/io_uring/cmd.h>
@@ -1139,9 +1140,9 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
return -EINVAL;
fud = fuse_get_dev(cmd->file);
- if (!fud) {
+ if (IS_ERR(fud)) {
pr_info_ratelimited("No fuse device found\n");
- return -ENOTCONN;
+ return PTR_ERR(fud);
}
fc = fud->fc;
@@ -1268,8 +1269,7 @@ void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
if (!queue)
goto err;
- if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
- req->in.h.unique = fuse_get_unique(fiq);
+ fuse_request_assign_unique(fiq, req);
spin_lock(&queue->lock);
err = -ENOTCONN;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4adcf09d4b01..f1ef77a0be05 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -356,8 +356,14 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
* Make the release synchronous if this is a fuseblk mount,
* synchronous RELEASE is allowed (and desirable) in this case
* because the server can be trusted not to screw up.
+ *
+ * Always use the asynchronous file put because the current thread
+ * might be the fuse server. This can happen if a process starts some
+ * aio and closes the fd before the aio completes. Since aio takes its
+ * own ref to the file, the IO completion has to drop the ref, which is
+ * how the fuse server can end up closing its clients' files.
*/
- fuse_file_put(ff, ff->fm->fc->destroy);
+ fuse_file_put(ff, false);
}
void fuse_release_common(struct file *file, bool isdir)
@@ -865,22 +871,20 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_args_pages *ap = &ia->ap;
size_t count = ia->read.in.size;
size_t num_read = args->out_args[0].size;
- struct address_space *mapping = NULL;
-
- for (i = 0; mapping == NULL && i < ap->num_folios; i++)
- mapping = ap->folios[i]->mapping;
+ struct address_space *mapping;
+ struct inode *inode;
- if (mapping) {
- struct inode *inode = mapping->host;
+ WARN_ON_ONCE(!ap->num_folios);
+ mapping = ap->folios[0]->mapping;
+ inode = mapping->host;
- /*
- * Short read means EOF. If file size is larger, truncate it
- */
- if (!err && num_read < count)
- fuse_short_read(inode, ia->read.attr_ver, num_read, ap);
+ /*
+ * Short read means EOF. If file size is larger, truncate it
+ */
+ if (!err && num_read < count)
+ fuse_short_read(inode, ia->read.attr_ver, num_read, ap);
- fuse_invalidate_atime(inode);
- }
+ fuse_invalidate_atime(inode);
for (i = 0; i < ap->num_folios; i++) {
folio_end_read(ap->folios[i], !err);
@@ -1175,7 +1179,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
num = min(iov_iter_count(ii), fc->max_write);
ap->args.in_pages = true;
- ap->descs[0].offset = offset;
while (num && ap->num_folios < max_folios) {
size_t tmp;
@@ -1823,19 +1826,15 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
struct fuse_args_pages *ap = &wpa->ia.ap;
struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
- struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;
- for (i = 0; i < ap->num_folios; i++) {
+ for (i = 0; i < ap->num_folios; i++)
/*
* Benchmarks showed that ending writeback within the
* scope of the fi->lock alleviates xarray lock
* contention and noticeably improves performance.
*/
iomap_finish_folio_write(inode, ap->folios[i], 1);
- dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- wb_writeout_inc(&bdi->wb);
- }
wake_up(&fi->page_waitq);
}
@@ -2010,14 +2009,11 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio,
uint32_t folio_index, loff_t offset, unsigned len)
{
- struct inode *inode = folio->mapping->host;
struct fuse_args_pages *ap = &wpa->ia.ap;
ap->folios[folio_index] = folio;
ap->descs[folio_index].offset = offset;
ap->descs[folio_index].length = len;
-
- inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
}
static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio,
@@ -2960,10 +2956,12 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
.nodeid_out = ff_out->nodeid,
.fh_out = ff_out->fh,
.off_out = pos_out,
- .len = min_t(size_t, len, UINT_MAX & PAGE_MASK),
+ .len = len,
.flags = flags
};
struct fuse_write_out outarg;
+ struct fuse_copy_file_range_out outarg_64;
+ u64 bytes_copied;
ssize_t err;
/* mark unstable when write-back is not used, and file_out gets
* extended */
@@ -3013,33 +3011,51 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
if (is_unstable)
set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
- args.opcode = FUSE_COPY_FILE_RANGE;
+ args.opcode = FUSE_COPY_FILE_RANGE_64;
args.nodeid = ff_in->nodeid;
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
args.in_args[0].value = &inarg;
args.out_numargs = 1;
- args.out_args[0].size = sizeof(outarg);
- args.out_args[0].value = &outarg;
+ args.out_args[0].size = sizeof(outarg_64);
+ args.out_args[0].value = &outarg_64;
+ if (fc->no_copy_file_range_64) {
+fallback:
+ /* Fall back to old op that can't handle large copy length */
+ args.opcode = FUSE_COPY_FILE_RANGE;
+ args.out_args[0].size = sizeof(outarg);
+ args.out_args[0].value = &outarg;
+ inarg.len = len = min_t(size_t, len, UINT_MAX & PAGE_MASK);
+ }
err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
- fc->no_copy_file_range = 1;
- err = -EOPNOTSUPP;
+ if (fc->no_copy_file_range_64) {
+ fc->no_copy_file_range = 1;
+ err = -EOPNOTSUPP;
+ } else {
+ fc->no_copy_file_range_64 = 1;
+ goto fallback;
+ }
}
- if (!err && outarg.size > len)
- err = -EIO;
-
if (err)
goto out;
+ bytes_copied = fc->no_copy_file_range_64 ?
+ outarg.size : outarg_64.bytes_copied;
+
+ if (bytes_copied > len) {
+ err = -EIO;
+ goto out;
+ }
+
truncate_inode_pages_range(inode_out->i_mapping,
ALIGN_DOWN(pos_out, PAGE_SIZE),
- ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
+ ALIGN(pos_out + bytes_copied, PAGE_SIZE) - 1);
file_update_time(file_out);
- fuse_write_update_attr(inode_out, pos_out + outarg.size, outarg.size);
+ fuse_write_update_attr(inode_out, pos_out + bytes_copied, bytes_copied);
- err = outarg.size;
+ err = bytes_copied;
out:
if (is_unstable)
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h
index 5a9bd771a319..6e8373f97040 100644
--- a/fs/fuse/fuse_dev_i.h
+++ b/fs/fuse/fuse_dev_i.h
@@ -12,6 +12,8 @@
#define FUSE_INT_REQ_BIT (1ULL << 0)
#define FUSE_REQ_ID_STEP (1ULL << 1)
+extern struct wait_queue_head fuse_dev_waitq;
+
struct fuse_arg;
struct fuse_args;
struct fuse_pqueue;
@@ -37,15 +39,22 @@ struct fuse_copy_state {
} ring;
};
-static inline struct fuse_dev *fuse_get_dev(struct file *file)
+#define FUSE_DEV_SYNC_INIT ((struct fuse_dev *) 1)
+#define FUSE_DEV_PTR_MASK (~1UL)
+
+static inline struct fuse_dev *__fuse_get_dev(struct file *file)
{
/*
* Lockless access is OK, because file->private data is set
* once during mount and is valid until the file is released.
*/
- return READ_ONCE(file->private_data);
+ struct fuse_dev *fud = READ_ONCE(file->private_data);
+
+ return (typeof(fud)) ((unsigned long) fud & FUSE_DEV_PTR_MASK);
}
+struct fuse_dev *fuse_get_dev(struct file *file);
+
unsigned int fuse_req_hash(u64 unique);
struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index cc428d04be3e..c2f2a48156d6 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -856,6 +856,9 @@ struct fuse_conn {
/** Does the filesystem support copy_file_range? */
unsigned no_copy_file_range:1;
+ /** Does the filesystem support copy_file_range_64? */
+ unsigned no_copy_file_range_64:1;
+
/* Send DESTROY request */
unsigned int destroy:1;
@@ -901,6 +904,9 @@ struct fuse_conn {
/* Is link not implemented by fs? */
unsigned int no_link:1;
+ /* Is synchronous FUSE_INIT allowed? */
+ unsigned int sync_init:1;
+
/* Use io_uring for communication */
unsigned int io_uring;
@@ -1255,6 +1261,11 @@ int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
gfp_t gfp_flags);
/**
+ * Assign a unique id to a fuse request
+ */
+void fuse_request_assign_unique(struct fuse_iqueue *fiq, struct fuse_req *req);
+
+/**
* End a finished request
*/
void fuse_request_end(struct fuse_req *req);
@@ -1315,7 +1326,7 @@ struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
struct fuse_dev *fuse_dev_alloc(void);
void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
void fuse_dev_free(struct fuse_dev *fud);
-void fuse_send_init(struct fuse_mount *fm);
+int fuse_send_init(struct fuse_mount *fm);
/**
* Fill in superblock and initialize fuse connection
@@ -1407,6 +1418,12 @@ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
u64 child_nodeid, struct qstr *name, u32 flags);
+/*
+ * Try to prune this inode. If neither the inode itself nor dentries associated
+ * with this inode have any external reference, then the inode can be freed.
+ */
+void fuse_try_prune_one_inode(struct fuse_conn *fc, u64 nodeid);
+
int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
bool isdir);
@@ -1512,29 +1529,11 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
void fuse_file_release(struct inode *inode, struct fuse_file *ff,
unsigned int open_flags, fl_owner_t id, bool isdir);
-/* passthrough.c */
-static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi)
-{
-#ifdef CONFIG_FUSE_PASSTHROUGH
- return READ_ONCE(fi->fb);
-#else
- return NULL;
-#endif
-}
-
-static inline struct fuse_backing *fuse_inode_backing_set(struct fuse_inode *fi,
- struct fuse_backing *fb)
-{
-#ifdef CONFIG_FUSE_PASSTHROUGH
- return xchg(&fi->fb, fb);
-#else
- return NULL;
-#endif
-}
-
+/* backing.c */
#ifdef CONFIG_FUSE_PASSTHROUGH
struct fuse_backing *fuse_backing_get(struct fuse_backing *fb);
void fuse_backing_put(struct fuse_backing *fb);
+struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, int backing_id);
#else
static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
@@ -1545,6 +1544,11 @@ static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
static inline void fuse_backing_put(struct fuse_backing *fb)
{
}
+static inline struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc,
+ int backing_id)
+{
+ return NULL;
+}
#endif
void fuse_backing_files_init(struct fuse_conn *fc);
@@ -1552,9 +1556,27 @@ void fuse_backing_files_free(struct fuse_conn *fc);
int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map);
int fuse_backing_close(struct fuse_conn *fc, int backing_id);
-struct fuse_backing *fuse_passthrough_open(struct file *file,
- struct inode *inode,
- int backing_id);
+/* passthrough.c */
+static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi)
+{
+#ifdef CONFIG_FUSE_PASSTHROUGH
+ return READ_ONCE(fi->fb);
+#else
+ return NULL;
+#endif
+}
+
+static inline struct fuse_backing *fuse_inode_backing_set(struct fuse_inode *fi,
+ struct fuse_backing *fb)
+{
+#ifdef CONFIG_FUSE_PASSTHROUGH
+ return xchg(&fi->fb, fb);
+#else
+ return NULL;
+#endif
+}
+
+struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id);
void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb);
static inline struct file *fuse_file_passthrough(struct fuse_file *ff)
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7485a41af892..d1babf56f254 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -7,6 +7,7 @@
*/
#include "fuse_i.h"
+#include "fuse_dev_i.h"
#include "dev_uring_i.h"
#include <linux/dax.h>
@@ -34,6 +35,7 @@ MODULE_LICENSE("GPL");
static struct kmem_cache *fuse_inode_cachep;
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
+DECLARE_WAIT_QUEUE_HEAD(fuse_dev_waitq);
static int set_global_limit(const char *val, const struct kernel_param *kp);
@@ -101,14 +103,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
if (!fi)
return NULL;
- fi->i_time = 0;
+ /* Initialize private data (i.e. everything except fi->inode) */
+ BUILD_BUG_ON(offsetof(struct fuse_inode, inode) != 0);
+ memset((void *) fi + sizeof(fi->inode), 0, sizeof(*fi) - sizeof(fi->inode));
+
fi->inval_mask = ~0;
- fi->nodeid = 0;
- fi->nlookup = 0;
- fi->attr_version = 0;
- fi->orig_ino = 0;
- fi->state = 0;
- fi->submount_lookup = NULL;
mutex_init(&fi->mutex);
spin_lock_init(&fi->lock);
fi->forget = fuse_alloc_forget();
@@ -586,6 +585,17 @@ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
return 0;
}
+void fuse_try_prune_one_inode(struct fuse_conn *fc, u64 nodeid)
+{
+ struct inode *inode;
+
+ inode = fuse_ilookup(fc, nodeid, NULL);
+ if (!inode)
+ return;
+ d_prune_aliases(inode);
+ iput(inode);
+}
+
bool fuse_lock_inode(struct inode *inode)
{
bool locked = false;
@@ -1469,7 +1479,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
wake_up_all(&fc->blocked_waitq);
}
-void fuse_send_init(struct fuse_mount *fm)
+static struct fuse_init_args *fuse_new_init(struct fuse_mount *fm)
{
struct fuse_init_args *ia;
u64 flags;
@@ -1528,10 +1538,30 @@ void fuse_send_init(struct fuse_mount *fm)
ia->args.out_args[0].value = &ia->out;
ia->args.force = true;
ia->args.nocreds = true;
- ia->args.end = process_init_reply;
- if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
- process_init_reply(fm, &ia->args, -ENOTCONN);
+ return ia;
+}
+
+int fuse_send_init(struct fuse_mount *fm)
+{
+ struct fuse_init_args *ia = fuse_new_init(fm);
+ int err;
+
+ if (fm->fc->sync_init) {
+ err = fuse_simple_request(fm, &ia->args);
+ /* Ignore size of init reply */
+ if (err > 0)
+ err = 0;
+ } else {
+ ia->args.end = process_init_reply;
+ err = fuse_simple_background(fm, &ia->args, GFP_KERNEL);
+ if (!err)
+ return 0;
+ }
+ process_init_reply(fm, &ia->args, err);
+ if (fm->fc->conn_error)
+ return -ENOTCONN;
+ return 0;
}
EXPORT_SYMBOL_GPL(fuse_send_init);
@@ -1561,8 +1591,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
if (err)
return err;
- /* fuse does it's own writeback accounting */
- sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
/*
@@ -1821,6 +1849,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
!sb_set_blocksize(sb, PAGE_SIZE))
goto err;
#endif
+ fc->sync_fs = 1;
} else {
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
@@ -1872,8 +1901,12 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
mutex_lock(&fuse_mutex);
err = -EINVAL;
- if (ctx->fudptr && *ctx->fudptr)
- goto err_unlock;
+ if (ctx->fudptr && *ctx->fudptr) {
+ if (*ctx->fudptr == FUSE_DEV_SYNC_INIT)
+ fc->sync_init = 1;
+ else
+ goto err_unlock;
+ }
err = fuse_ctl_add_conn(fc);
if (err)
@@ -1881,8 +1914,10 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
list_add_tail(&fc->entry, &fuse_conn_list);
sb->s_root = root_dentry;
- if (ctx->fudptr)
+ if (ctx->fudptr) {
*ctx->fudptr = fud;
+ wake_up_all(&fuse_dev_waitq);
+ }
mutex_unlock(&fuse_mutex);
return 0;
@@ -1903,6 +1938,7 @@ EXPORT_SYMBOL_GPL(fuse_fill_super_common);
static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
{
struct fuse_fs_context *ctx = fsc->fs_private;
+ struct fuse_mount *fm;
int err;
if (!ctx->file || !ctx->rootmode_present ||
@@ -1923,8 +1959,10 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
return err;
/* file->private_data shall be visible on all CPUs after this */
smp_mb();
- fuse_send_init(get_fuse_mount_super(sb));
- return 0;
+
+ fm = get_fuse_mount_super(sb);
+
+ return fuse_send_init(fm);
}
/*
@@ -1985,7 +2023,7 @@ static int fuse_get_tree(struct fs_context *fsc)
* Allow creating a fuse mount with an already initialized fuse
* connection
*/
- fud = READ_ONCE(ctx->file->private_data);
+ fud = __fuse_get_dev(ctx->file);
if (ctx->file->f_op == &fuse_dev_operations && fud) {
fsc->sget_key = fud->fc;
sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c
index c99e285f3183..3728933188f3 100644
--- a/fs/fuse/iomode.c
+++ b/fs/fuse/iomode.c
@@ -177,8 +177,7 @@ static int fuse_file_passthrough_open(struct inode *inode, struct file *file)
(ff->open_flags & ~FOPEN_PASSTHROUGH_MASK))
return -EINVAL;
- fb = fuse_passthrough_open(file, inode,
- ff->args->open_outarg.backing_id);
+ fb = fuse_passthrough_open(file, ff->args->open_outarg.backing_id);
if (IS_ERR(fb))
return PTR_ERR(fb);
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
index eb97ac009e75..72de97c03d0e 100644
--- a/fs/fuse/passthrough.c
+++ b/fs/fuse/passthrough.c
@@ -144,171 +144,12 @@ ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma)
return backing_file_mmap(backing_file, vma, &ctx);
}
-struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
-{
- if (fb && refcount_inc_not_zero(&fb->count))
- return fb;
- return NULL;
-}
-
-static void fuse_backing_free(struct fuse_backing *fb)
-{
- pr_debug("%s: fb=0x%p\n", __func__, fb);
-
- if (fb->file)
- fput(fb->file);
- put_cred(fb->cred);
- kfree_rcu(fb, rcu);
-}
-
-void fuse_backing_put(struct fuse_backing *fb)
-{
- if (fb && refcount_dec_and_test(&fb->count))
- fuse_backing_free(fb);
-}
-
-void fuse_backing_files_init(struct fuse_conn *fc)
-{
- idr_init(&fc->backing_files_map);
-}
-
-static int fuse_backing_id_alloc(struct fuse_conn *fc, struct fuse_backing *fb)
-{
- int id;
-
- idr_preload(GFP_KERNEL);
- spin_lock(&fc->lock);
- /* FIXME: xarray might be space inefficient */
- id = idr_alloc_cyclic(&fc->backing_files_map, fb, 1, 0, GFP_ATOMIC);
- spin_unlock(&fc->lock);
- idr_preload_end();
-
- WARN_ON_ONCE(id == 0);
- return id;
-}
-
-static struct fuse_backing *fuse_backing_id_remove(struct fuse_conn *fc,
- int id)
-{
- struct fuse_backing *fb;
-
- spin_lock(&fc->lock);
- fb = idr_remove(&fc->backing_files_map, id);
- spin_unlock(&fc->lock);
-
- return fb;
-}
-
-static int fuse_backing_id_free(int id, void *p, void *data)
-{
- struct fuse_backing *fb = p;
-
- WARN_ON_ONCE(refcount_read(&fb->count) != 1);
- fuse_backing_free(fb);
- return 0;
-}
-
-void fuse_backing_files_free(struct fuse_conn *fc)
-{
- idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
- idr_destroy(&fc->backing_files_map);
-}
-
-int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
-{
- struct file *file;
- struct super_block *backing_sb;
- struct fuse_backing *fb = NULL;
- int res;
-
- pr_debug("%s: fd=%d flags=0x%x\n", __func__, map->fd, map->flags);
-
- /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
- res = -EPERM;
- if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
- goto out;
-
- res = -EINVAL;
- if (map->flags || map->padding)
- goto out;
-
- file = fget_raw(map->fd);
- res = -EBADF;
- if (!file)
- goto out;
-
- /* read/write/splice/mmap passthrough only relevant for regular files */
- res = d_is_dir(file->f_path.dentry) ? -EISDIR : -EINVAL;
- if (!d_is_reg(file->f_path.dentry))
- goto out_fput;
-
- backing_sb = file_inode(file)->i_sb;
- res = -ELOOP;
- if (backing_sb->s_stack_depth >= fc->max_stack_depth)
- goto out_fput;
-
- fb = kmalloc(sizeof(struct fuse_backing), GFP_KERNEL);
- res = -ENOMEM;
- if (!fb)
- goto out_fput;
-
- fb->file = file;
- fb->cred = prepare_creds();
- refcount_set(&fb->count, 1);
-
- res = fuse_backing_id_alloc(fc, fb);
- if (res < 0) {
- fuse_backing_free(fb);
- fb = NULL;
- }
-
-out:
- pr_debug("%s: fb=0x%p, ret=%i\n", __func__, fb, res);
-
- return res;
-
-out_fput:
- fput(file);
- goto out;
-}
-
-int fuse_backing_close(struct fuse_conn *fc, int backing_id)
-{
- struct fuse_backing *fb = NULL;
- int err;
-
- pr_debug("%s: backing_id=%d\n", __func__, backing_id);
-
- /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
- err = -EPERM;
- if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
- goto out;
-
- err = -EINVAL;
- if (backing_id <= 0)
- goto out;
-
- err = -ENOENT;
- fb = fuse_backing_id_remove(fc, backing_id);
- if (!fb)
- goto out;
-
- fuse_backing_put(fb);
- err = 0;
-out:
- pr_debug("%s: fb=0x%p, err=%i\n", __func__, fb, err);
-
- return err;
-}
-
/*
* Setup passthrough to a backing file.
*
* Returns an fb object with elevated refcount to be stored in fuse inode.
*/
-struct fuse_backing *fuse_passthrough_open(struct file *file,
- struct inode *inode,
- int backing_id)
+struct fuse_backing *fuse_passthrough_open(struct file *file, int backing_id)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fm->fc;
@@ -320,12 +161,8 @@ struct fuse_backing *fuse_passthrough_open(struct file *file,
if (backing_id <= 0)
goto out;
- rcu_read_lock();
- fb = idr_find(&fc->backing_files_map, backing_id);
- fb = fuse_backing_get(fb);
- rcu_read_unlock();
-
err = -ENOENT;
+ fb = fuse_backing_lookup(fc, backing_id);
if (!fb)
goto out;
diff --git a/fs/fuse/trace.c b/fs/fuse/trace.c
new file mode 100644
index 000000000000..93bd72efc98c
--- /dev/null
+++ b/fs/fuse/trace.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "dev_uring_i.h"
+#include "fuse_i.h"
+#include "fuse_dev_i.h"
+
+#include <linux/pagemap.h>
+
+#define CREATE_TRACE_POINTS
+#include "fuse_trace.h"
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 76c8fd0bfc75..6bc7c97b017d 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -20,6 +20,7 @@
#include <linux/cleanup.h>
#include <linux/uio.h>
#include "fuse_i.h"
+#include "fuse_dev_i.h"
/* Used to help calculate the FUSE connection's max_pages limit for a request's
* size. Parts of the struct fuse_req are sliced into scattergather lists in
@@ -761,7 +762,6 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
static void virtio_fs_request_complete(struct fuse_req *req,
struct virtio_fs_vq *fsvq)
{
- struct fuse_pqueue *fpq = &fsvq->fud->pq;
struct fuse_args *args;
struct fuse_args_pages *ap;
unsigned int len, i, thislen;
@@ -790,9 +790,7 @@ static void virtio_fs_request_complete(struct fuse_req *req,
}
}
- spin_lock(&fpq->lock);
clear_bit(FR_SENT, &req->flags);
- spin_unlock(&fpq->lock);
fuse_request_end(req);
spin_lock(&fsvq->lock);
@@ -1384,7 +1382,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
unsigned int out_sgs = 0;
unsigned int in_sgs = 0;
unsigned int total_sgs;
- unsigned int i;
+ unsigned int i, hash;
int ret;
bool notify;
struct fuse_pqueue *fpq;
@@ -1444,8 +1442,9 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
/* Request successfully sent. */
fpq = &fsvq->fud->pq;
+ hash = fuse_req_hash(req->in.h.unique);
spin_lock(&fpq->lock);
- list_add_tail(&req->list, fpq->processing);
+ list_add_tail(&req->list, &fpq->processing[hash]);
spin_unlock(&fpq->lock);
set_bit(FR_SENT, &req->flags);
/* matches barrier in request_wait_answer() */
@@ -1480,8 +1479,7 @@ static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
struct virtio_fs_vq *fsvq;
int ret;
- if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
- req->in.h.unique = fuse_get_unique(fiq);
+ fuse_request_assign_unique(fiq, req);
clear_bit(FR_PENDING, &req->flags);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index e721148c95d0..3e64f14739dd 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -66,16 +66,6 @@ static inline void wb_stat_mod(struct bdi_writeback *wb,
percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
}
-static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
-{
- wb_stat_mod(wb, item, 1);
-}
-
-static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
-{
- wb_stat_mod(wb, item, -1);
-}
-
static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
{
return percpu_counter_read_positive(&wb->stat[item]);
@@ -118,12 +108,10 @@ int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit
*
* BDI_CAP_WRITEBACK: Supports dirty page writeback, and dirty pages
* should contribute to accounting
- * BDI_CAP_WRITEBACK_ACCT: Automatically account writeback pages
* BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold
*/
#define BDI_CAP_WRITEBACK (1 << 0)
-#define BDI_CAP_WRITEBACK_ACCT (1 << 1)
-#define BDI_CAP_STRICTLIMIT (1 << 2)
+#define BDI_CAP_STRICTLIMIT (1 << 1)
extern struct backing_dev_info noop_backing_dev_info;
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 09855d819418..f648044466d5 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -965,6 +965,18 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
__ret; \
})
+#define __wait_event_state_exclusive(wq, condition, state) \
+ ___wait_event(wq, condition, state, 1, 0, schedule())
+
+#define wait_event_state_exclusive(wq, condition, state) \
+({ \
+ int __ret = 0; \
+ might_sleep(); \
+ if (!(condition)) \
+ __ret = __wait_event_state_exclusive(wq, condition, state); \
+ __ret; \
+})
+
#define __wait_event_killable_timeout(wq_head, condition, timeout) \
___wait_event(wq_head, ___wait_cond_timeout(condition), \
TASK_KILLABLE, 0, timeout, \
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 122d6586e8d4..c13e1f9a2f12 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -235,6 +235,11 @@
*
* 7.44
* - add FUSE_NOTIFY_INC_EPOCH
+ *
+ * 7.45
+ * - add FUSE_COPY_FILE_RANGE_64
+ * - add struct fuse_copy_file_range_out
+ * - add FUSE_NOTIFY_PRUNE
*/
#ifndef _LINUX_FUSE_H
@@ -270,7 +275,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 44
+#define FUSE_KERNEL_MINOR_VERSION 45
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -657,6 +662,7 @@ enum fuse_opcode {
FUSE_SYNCFS = 50,
FUSE_TMPFILE = 51,
FUSE_STATX = 52,
+ FUSE_COPY_FILE_RANGE_64 = 53,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -675,7 +681,7 @@ enum fuse_notify_code {
FUSE_NOTIFY_DELETE = 6,
FUSE_NOTIFY_RESEND = 7,
FUSE_NOTIFY_INC_EPOCH = 8,
- FUSE_NOTIFY_CODE_MAX,
+ FUSE_NOTIFY_PRUNE = 9,
};
/* The read buffer is required to be at least 8k, but may be much larger */
@@ -1114,6 +1120,12 @@ struct fuse_notify_retrieve_in {
uint64_t dummy4;
};
+struct fuse_notify_prune_out {
+ uint32_t count;
+ uint32_t padding;
+ uint64_t spare;
+};
+
struct fuse_backing_map {
int32_t fd;
uint32_t flags;
@@ -1126,6 +1138,7 @@ struct fuse_backing_map {
#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \
struct fuse_backing_map)
#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t)
+#define FUSE_DEV_IOC_SYNC_INIT _IO(FUSE_DEV_IOC_MAGIC, 3)
struct fuse_lseek_in {
uint64_t fh;
@@ -1148,6 +1161,11 @@ struct fuse_copy_file_range_in {
uint64_t flags;
};
+/* For FUSE_COPY_FILE_RANGE_64 */
+struct fuse_copy_file_range_out {
+ uint64_t bytes_copied;
+};
+
#define FUSE_SETUPMAPPING_FLAG_WRITE (1ull << 0)
#define FUSE_SETUPMAPPING_FLAG_READ (1ull << 1)
struct fuse_setupmapping_in {
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 128b525b8811..41b6c9386b69 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1031,7 +1031,7 @@ struct backing_dev_info *bdi_alloc(int node_id)
kfree(bdi);
return NULL;
}
- bdi->capabilities = BDI_CAP_WRITEBACK | BDI_CAP_WRITEBACK_ACCT;
+ bdi->capabilities = BDI_CAP_WRITEBACK;
bdi->ra_pages = VM_READAHEAD_PAGES;
bdi->io_pages = VM_READAHEAD_PAGES;
timer_setup(&bdi->laptop_mode_wb_timer, laptop_mode_timer_fn, 0);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5f90fd6a7137..757bc4d3b5b5 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2990,26 +2990,23 @@ bool __folio_end_writeback(struct folio *folio)
if (mapping && mapping_use_writeback_tags(mapping)) {
struct inode *inode = mapping->host;
- struct backing_dev_info *bdi = inode_to_bdi(inode);
+ struct bdi_writeback *wb;
unsigned long flags;
xa_lock_irqsave(&mapping->i_pages, flags);
ret = folio_xor_flags_has_waiters(folio, 1 << PG_writeback);
__xa_clear_mark(&mapping->i_pages, folio->index,
PAGECACHE_TAG_WRITEBACK);
- if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
- struct bdi_writeback *wb = inode_to_wb(inode);
- wb_stat_mod(wb, WB_WRITEBACK, -nr);
- __wb_writeout_add(wb, nr);
- if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
- wb_inode_writeback_end(wb);
+ wb = inode_to_wb(inode);
+ wb_stat_mod(wb, WB_WRITEBACK, -nr);
+ __wb_writeout_add(wb, nr);
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+ wb_inode_writeback_end(wb);
+ if (mapping->host)
+ sb_clear_inode_writeback(mapping->host);
}
- if (mapping->host && !mapping_tagged(mapping,
- PAGECACHE_TAG_WRITEBACK))
- sb_clear_inode_writeback(mapping->host);
-
xa_unlock_irqrestore(&mapping->i_pages, flags);
} else {
ret = folio_xor_flags_has_waiters(folio, 1 << PG_writeback);
@@ -3034,7 +3031,7 @@ void __folio_start_writeback(struct folio *folio, bool keep_write)
if (mapping && mapping_use_writeback_tags(mapping)) {
XA_STATE(xas, &mapping->i_pages, folio->index);
struct inode *inode = mapping->host;
- struct backing_dev_info *bdi = inode_to_bdi(inode);
+ struct bdi_writeback *wb;
unsigned long flags;
bool on_wblist;
@@ -3045,21 +3042,19 @@ void __folio_start_writeback(struct folio *folio, bool keep_write)
on_wblist = mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
- if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
- struct bdi_writeback *wb = inode_to_wb(inode);
-
- wb_stat_mod(wb, WB_WRITEBACK, nr);
- if (!on_wblist)
- wb_inode_writeback_start(wb);
+ wb = inode_to_wb(inode);
+ wb_stat_mod(wb, WB_WRITEBACK, nr);
+ if (!on_wblist) {
+ wb_inode_writeback_start(wb);
+ /*
+ * We can come through here when swapping anonymous
+ * folios, so we don't necessarily have an inode to
+ * track for sync.
+ */
+ if (mapping->host)
+ sb_mark_inode_writeback(mapping->host);
}
- /*
- * We can come through here when swapping anonymous
- * folios, so we don't necessarily have an inode to
- * track for sync.
- */
- if (mapping->host && !on_wblist)
- sb_mark_inode_writeback(mapping->host);
if (!folio_test_dirty(folio))
xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
if (!keep_write)
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index a2d8e1093b00..36d03860d9d8 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -36,6 +36,7 @@ TARGETS += filesystems/fat
TARGETS += filesystems/overlayfs
TARGETS += filesystems/statmount
TARGETS += filesystems/mount-notify
+TARGETS += filesystems/fuse
TARGETS += firmware
TARGETS += fpu
TARGETS += ftrace
diff --git a/tools/testing/selftests/filesystems/fuse/.gitignore b/tools/testing/selftests/filesystems/fuse/.gitignore
new file mode 100644
index 000000000000..3e72e742d08e
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+fuse_mnt
+fusectl_test
diff --git a/tools/testing/selftests/filesystems/fuse/Makefile b/tools/testing/selftests/filesystems/fuse/Makefile
new file mode 100644
index 000000000000..612aad69a93a
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS := fusectl_test
+TEST_GEN_FILES := fuse_mnt
+
+include ../../lib.mk
+
+VAR_CFLAGS := $(shell pkg-config fuse --cflags 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS := -D_FILE_OFFSET_BITS=64 -I/usr/include/fuse
+endif
+
+VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS := -lfuse -pthread
+endif
+
+$(OUTPUT)/fuse_mnt: CFLAGS += $(VAR_CFLAGS)
+$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS)
diff --git a/tools/testing/selftests/filesystems/fuse/fuse_mnt.c b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c
new file mode 100644
index 000000000000..d12b17f30fad
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fusectl test file-system
+ * Creates a simple FUSE filesystem with a single read-write file (/test)
+ */
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+static char *content;
+static size_t content_size = 0;
+static const char test_path[] = "/test";
+
+static int test_getattr(const char *path, struct stat *st)
+{
+ memset(st, 0, sizeof(*st));
+
+ if (!strcmp(path, "/")) {
+ st->st_mode = S_IFDIR | 0755;
+ st->st_nlink = 2;
+ return 0;
+ }
+
+ if (!strcmp(path, test_path)) {
+ st->st_mode = S_IFREG | 0664;
+ st->st_nlink = 1;
+ st->st_size = content_size;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int test_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
+ off_t offset, struct fuse_file_info *fi)
+{
+ if (strcmp(path, "/"))
+ return -ENOENT;
+
+ filler(buf, ".", NULL, 0);
+ filler(buf, "..", NULL, 0);
+ filler(buf, test_path + 1, NULL, 0);
+
+ return 0;
+}
+
+static int test_open(const char *path, struct fuse_file_info *fi)
+{
+ if (strcmp(path, test_path))
+ return -ENOENT;
+
+ return 0;
+}
+
+static int test_read(const char *path, char *buf, size_t size, off_t offset,
+ struct fuse_file_info *fi)
+{
+ if (strcmp(path, test_path) != 0)
+ return -ENOENT;
+
+ if (!content || content_size == 0)
+ return 0;
+
+ if (offset >= content_size)
+ return 0;
+
+ if (offset + size > content_size)
+ size = content_size - offset;
+
+ memcpy(buf, content + offset, size);
+
+ return size;
+}
+
+static int test_write(const char *path, const char *buf, size_t size,
+ off_t offset, struct fuse_file_info *fi)
+{
+ size_t new_size;
+
+ if (strcmp(path, test_path) != 0)
+ return -ENOENT;
+
+ if(offset > content_size)
+ return -EINVAL;
+
+ new_size = MAX(offset + size, content_size);
+
+ if (new_size > content_size)
+ content = realloc(content, new_size);
+
+ content_size = new_size;
+
+ if (!content)
+ return -ENOMEM;
+
+ memcpy(content + offset, buf, size);
+
+ return size;
+}
+
+static int test_truncate(const char *path, off_t size)
+{
+ if (strcmp(path, test_path) != 0)
+ return -ENOENT;
+
+ if (size == 0) {
+ free(content);
+ content = NULL;
+ content_size = 0;
+ return 0;
+ }
+
+ content = realloc(content, size);
+
+ if (!content)
+ return -ENOMEM;
+
+ if (size > content_size)
+ memset(content + content_size, 0, size - content_size);
+
+ content_size = size;
+ return 0;
+}
+
+static struct fuse_operations memfd_ops = {
+ .getattr = test_getattr,
+ .readdir = test_readdir,
+ .open = test_open,
+ .read = test_read,
+ .write = test_write,
+ .truncate = test_truncate,
+};
+
+int main(int argc, char *argv[])
+{
+ return fuse_main(argc, argv, &memfd_ops, NULL);
+}
diff --git a/tools/testing/selftests/filesystems/fuse/fusectl_test.c b/tools/testing/selftests/filesystems/fuse/fusectl_test.c
new file mode 100644
index 000000000000..8d124d1cacb2
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/fusectl_test.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Chen Linxuan <chenlinxuan@uniontech.com>
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sched.h>
+#include <linux/limits.h>
+
+#include "../../kselftest_harness.h"
+
+#define FUSECTL_MOUNTPOINT "/sys/fs/fuse/connections"
+#define FUSE_MOUNTPOINT "/tmp/fuse_mnt_XXXXXX"
+#define FUSE_DEVICE "/dev/fuse"
+#define FUSECTL_TEST_VALUE "1"
+
+static void write_file(struct __test_metadata *const _metadata,
+ const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+
+ ASSERT_GE(fd, 0);
+ ASSERT_EQ(write(fd, val, len), len);
+ ASSERT_EQ(close(fd), 0);
+}
+
+FIXTURE(fusectl){
+ char fuse_mountpoint[sizeof(FUSE_MOUNTPOINT)];
+ int connection;
+};
+
+FIXTURE_SETUP(fusectl)
+{
+ const char *fuse_mnt_prog = "./fuse_mnt";
+ int status, pid;
+ struct stat statbuf;
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+ char buf[32];
+
+ /* Setup userns */
+ ASSERT_EQ(unshare(CLONE_NEWNS|CLONE_NEWUSER), 0);
+ sprintf(buf, "0 %d 1", uid);
+ write_file(_metadata, "/proc/self/uid_map", buf);
+ write_file(_metadata, "/proc/self/setgroups", "deny");
+ sprintf(buf, "0 %d 1", gid);
+ write_file(_metadata, "/proc/self/gid_map", buf);
+ ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0);
+
+ strcpy(self->fuse_mountpoint, FUSE_MOUNTPOINT);
+
+ if (!mkdtemp(self->fuse_mountpoint))
+ SKIP(return,
+ "Failed to create FUSE mountpoint %s",
+ strerror(errno));
+
+ if (access(FUSECTL_MOUNTPOINT, F_OK))
+ SKIP(return,
+ "FUSE control filesystem not mounted");
+
+ pid = fork();
+ if (pid < 0)
+ SKIP(return,
+ "Failed to fork FUSE daemon process: %s",
+ strerror(errno));
+
+ if (pid == 0) {
+ execlp(fuse_mnt_prog, fuse_mnt_prog, self->fuse_mountpoint, NULL);
+ exit(errno);
+ }
+
+ waitpid(pid, &status, 0);
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ SKIP(return,
+ "Failed to start FUSE daemon %s",
+ strerror(WEXITSTATUS(status)));
+ }
+
+ if (stat(self->fuse_mountpoint, &statbuf))
+ SKIP(return,
+ "Failed to stat FUSE mountpoint %s",
+ strerror(errno));
+
+ self->connection = statbuf.st_dev;
+}
+
+FIXTURE_TEARDOWN(fusectl)
+{
+ umount2(self->fuse_mountpoint, MNT_DETACH);
+ rmdir(self->fuse_mountpoint);
+}
+
+TEST_F(fusectl, abort)
+{
+ char path_buf[PATH_MAX];
+ int abort_fd, test_fd, ret;
+
+ sprintf(path_buf, "/sys/fs/fuse/connections/%d/abort", self->connection);
+
+ ASSERT_EQ(0, access(path_buf, F_OK));
+
+ abort_fd = open(path_buf, O_WRONLY);
+ ASSERT_GE(abort_fd, 0);
+
+ sprintf(path_buf, "%s/test", self->fuse_mountpoint);
+
+ test_fd = open(path_buf, O_RDWR);
+ ASSERT_GE(test_fd, 0);
+
+ ret = read(test_fd, path_buf, sizeof(path_buf));
+ ASSERT_EQ(ret, 0);
+
+ ret = write(test_fd, "test", sizeof("test"));
+ ASSERT_EQ(ret, sizeof("test"));
+
+ ret = lseek(test_fd, 0, SEEK_SET);
+ ASSERT_GE(ret, 0);
+
+ ret = write(abort_fd, FUSECTL_TEST_VALUE, sizeof(FUSECTL_TEST_VALUE));
+ ASSERT_GT(ret, 0);
+
+ close(abort_fd);
+
+ ret = read(test_fd, path_buf, sizeof(path_buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENOTCONN);
+}
+
+TEST_HARNESS_MAIN