summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/9p/vfs_addr.c6
-rw-r--r--fs/afs/Makefile1
-rw-r--r--fs/afs/callback.c4
-rw-r--r--fs/afs/dir.c809
-rw-r--r--fs/afs/dir_edit.c383
-rw-r--r--fs/afs/dir_search.c227
-rw-r--r--fs/afs/file.c260
-rw-r--r--fs/afs/fs_operation.c113
-rw-r--r--fs/afs/fsclient.c62
-rw-r--r--fs/afs/inode.c140
-rw-r--r--fs/afs/internal.h143
-rw-r--r--fs/afs/main.c2
-rw-r--r--fs/afs/mntpt.c22
-rw-r--r--fs/afs/rotate.c4
-rw-r--r--fs/afs/rxrpc.c37
-rw-r--r--fs/afs/super.c4
-rw-r--r--fs/afs/validation.c31
-rw-r--r--fs/afs/vlclient.c1
-rw-r--r--fs/afs/write.c16
-rw-r--r--fs/afs/xdr_fs.h2
-rw-r--r--fs/afs/yfsclient.c49
-rw-r--r--fs/cachefiles/io.c4
-rw-r--r--fs/cachefiles/xattr.c9
-rw-r--r--fs/ceph/addr.c22
-rw-r--r--fs/netfs/Makefile5
-rw-r--r--fs/netfs/buffered_read.c290
-rw-r--r--fs/netfs/direct_read.c78
-rw-r--r--fs/netfs/direct_write.c10
-rw-r--r--fs/netfs/internal.h41
-rw-r--r--fs/netfs/main.c6
-rw-r--r--fs/netfs/misc.c164
-rw-r--r--fs/netfs/objects.c21
-rw-r--r--fs/netfs/read_collect.c761
-rw-r--r--fs/netfs/read_pgpriv2.c207
-rw-r--r--fs/netfs/read_retry.c209
-rw-r--r--fs/netfs/read_single.c195
-rw-r--r--fs/netfs/rolling_buffer.c226
-rw-r--r--fs/netfs/stats.c4
-rw-r--r--fs/netfs/write_collect.c281
-rw-r--r--fs/netfs/write_issue.c241
-rw-r--r--fs/netfs/write_retry.c232
-rw-r--r--fs/nfs/fscache.c6
-rw-r--r--fs/nfs/fscache.h3
-rw-r--r--fs/smb/client/cifssmb.c12
-rw-r--r--fs/smb/client/file.c3
-rw-r--r--fs/smb/client/smb2ops.c2
-rw-r--r--fs/smb/client/smb2pdu.c15
-rw-r--r--include/linux/folio_queue.h12
-rw-r--r--include/linux/netfs.h54
-rw-r--r--include/linux/rolling_buffer.h61
-rw-r--r--include/trace/events/afs.h210
-rw-r--r--include/trace/events/cachefiles.h185
-rw-r--r--include/trace/events/netfs.h229
-rw-r--r--lib/kunit_iov_iter.c4
54 files changed, 3911 insertions, 2207 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 3bc9ce6c575e..32619d146cbc 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -81,13 +81,13 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (pos + total >= i_size_read(rreq->inode))
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
-
- if (!err) {
+ if (!err && total) {
subreq->transferred += total;
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
}
- netfs_read_subreq_terminated(subreq, err, false);
+ subreq->error = err;
+ netfs_read_subreq_terminated(subreq);
}
/**
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index dcdc0f1bb76f..5efd7e13b304 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -11,6 +11,7 @@ kafs-y := \
cmservice.o \
dir.o \
dir_edit.o \
+ dir_search.o \
dir_silly.o \
dynroot.o \
file.o \
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 99b2c8172021..69e1dd55b160 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -41,7 +41,7 @@ static void afs_volume_init_callback(struct afs_volume *volume)
list_for_each_entry(vnode, &volume->open_mmaps, cb_mmap_link) {
if (vnode->cb_v_check != atomic_read(&volume->cb_v_break)) {
- atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
+ afs_clear_cb_promise(vnode, afs_cb_promise_clear_vol_init_cb);
queue_work(system_unbound_wq, &vnode->cb_work);
}
}
@@ -79,7 +79,7 @@ void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reas
_enter("");
clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
- if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE) {
+ if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_cb_break)) {
vnode->cb_break++;
vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
afs_clear_permits(vnode);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index ada363af5aab..a843c36fc471 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -13,6 +13,7 @@
#include <linux/ctype.h>
#include <linux/sched.h>
#include <linux/iversion.h>
+#include <linux/iov_iter.h>
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
#include "afs_fs.h"
@@ -42,15 +43,6 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags);
-static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags);
-static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
- size_t length);
-
-static bool afs_dir_dirty_folio(struct address_space *mapping,
- struct folio *folio)
-{
- BUG(); /* This should never happen. */
-}
const struct file_operations afs_dir_file_operations = {
.open = afs_dir_open,
@@ -75,10 +67,7 @@ const struct inode_operations afs_dir_inode_operations = {
};
const struct address_space_operations afs_dir_aops = {
- .dirty_folio = afs_dir_dirty_folio,
- .release_folio = afs_dir_release_folio,
- .invalidate_folio = afs_dir_invalidate_folio,
- .migrate_folio = filemap_migrate_folio,
+ .writepages = afs_single_writepages,
};
const struct dentry_operations afs_fs_dentry_operations = {
@@ -99,152 +88,124 @@ struct afs_lookup_one_cookie {
struct afs_lookup_cookie {
struct dir_context ctx;
struct qstr name;
- bool found;
- bool one_only;
unsigned short nr_fids;
struct afs_fid fids[50];
};
+static void afs_dir_unuse_cookie(struct afs_vnode *dvnode, int ret)
+{
+ if (ret == 0) {
+ struct afs_vnode_cache_aux aux;
+ loff_t i_size = i_size_read(&dvnode->netfs.inode);
+
+ afs_set_cache_aux(dvnode, &aux);
+ fscache_unuse_cookie(afs_vnode_cache(dvnode), &aux, &i_size);
+ } else {
+ fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL);
+ }
+}
+
/*
- * Drop the refs that we're holding on the folios we were reading into. We've
- * got refs on the first nr_pages pages.
+ * Iterate through a kmapped directory segment, dumping a summary of
+ * the contents.
*/
-static void afs_dir_read_cleanup(struct afs_read *req)
+static size_t afs_dir_dump_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
{
- struct address_space *mapping = req->vnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t last = req->nr_pages - 1;
+ do {
+ union afs_xdr_dir_block *block = iter_base;
- XA_STATE(xas, &mapping->i_pages, 0);
+ pr_warn("[%05zx] %32phN\n", progress, block);
+ iter_base += AFS_DIR_BLOCK_SIZE;
+ progress += AFS_DIR_BLOCK_SIZE;
+ len -= AFS_DIR_BLOCK_SIZE;
+ } while (len > 0);
- if (unlikely(!req->nr_pages))
- return;
+ return len;
+}
- rcu_read_lock();
- xas_for_each(&xas, folio, last) {
- if (xas_retry(&xas, folio))
- continue;
- BUG_ON(xa_is_value(folio));
- ASSERTCMP(folio->mapping, ==, mapping);
+/*
+ * Dump the contents of a directory.
+ */
+static void afs_dir_dump(struct afs_vnode *dvnode)
+{
+ struct iov_iter iter;
+ unsigned long long i_size = i_size_read(&dvnode->netfs.inode);
- folio_put(folio);
- }
+ pr_warn("DIR %llx:%llx is=%llx\n",
+ dvnode->fid.vid, dvnode->fid.vnode, i_size);
- rcu_read_unlock();
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ iterate_folioq(&iter, iov_iter_count(&iter), NULL, NULL,
+ afs_dir_dump_step);
}
/*
* check that a directory folio is valid
*/
-static bool afs_dir_check_folio(struct afs_vnode *dvnode, struct folio *folio,
- loff_t i_size)
+static bool afs_dir_check_block(struct afs_vnode *dvnode, size_t progress,
+ union afs_xdr_dir_block *block)
{
- union afs_xdr_dir_block *block;
- size_t offset, size;
- loff_t pos;
+ if (block->hdr.magic != AFS_DIR_MAGIC) {
+ pr_warn("%s(%lx): [%zx] bad magic %04x\n",
+ __func__, dvnode->netfs.inode.i_ino,
+ progress, ntohs(block->hdr.magic));
+ trace_afs_dir_check_failed(dvnode, progress);
+ trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
+ return false;
+ }
- /* Determine how many magic numbers there should be in this folio, but
- * we must take care because the directory may change size under us.
+ /* Make sure each block is NUL terminated so we can reasonably
+ * use string functions on it. The filenames in the folio
+ * *should* be NUL-terminated anyway.
*/
- pos = folio_pos(folio);
- if (i_size <= pos)
- goto checked;
-
- size = min_t(loff_t, folio_size(folio), i_size - pos);
- for (offset = 0; offset < size; offset += sizeof(*block)) {
- block = kmap_local_folio(folio, offset);
- if (block->hdr.magic != AFS_DIR_MAGIC) {
- printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n",
- __func__, dvnode->netfs.inode.i_ino,
- pos, offset, size, ntohs(block->hdr.magic));
- trace_afs_dir_check_failed(dvnode, pos + offset, i_size);
- kunmap_local(block);
- trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
- goto error;
- }
-
- /* Make sure each block is NUL terminated so we can reasonably
- * use string functions on it. The filenames in the folio
- * *should* be NUL-terminated anyway.
- */
- ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
-
- kunmap_local(block);
- }
-checked:
+ ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
afs_stat_v(dvnode, n_read_dir);
return true;
-
-error:
- return false;
}
/*
- * Dump the contents of a directory.
+ * Iterate through a kmapped directory segment, checking the content.
*/
-static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
+static size_t afs_dir_check_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
{
- union afs_xdr_dir_block *block;
- struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t last = req->nr_pages - 1;
- size_t offset, size;
-
- XA_STATE(xas, &mapping->i_pages, 0);
-
- pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n",
- dvnode->fid.vid, dvnode->fid.vnode,
- req->file_size, req->len, req->actual_len);
- pr_warn("DIR %llx %x %zx %zx\n",
- req->pos, req->nr_pages,
- req->iter->iov_offset, iov_iter_count(req->iter));
-
- xas_for_each(&xas, folio, last) {
- if (xas_retry(&xas, folio))
- continue;
+ struct afs_vnode *dvnode = priv;
- BUG_ON(folio->mapping != mapping);
+ if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE ||
+ len % AFS_DIR_BLOCK_SIZE))
+ return len;
- size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio));
- for (offset = 0; offset < size; offset += sizeof(*block)) {
- block = kmap_local_folio(folio, offset);
- pr_warn("[%02lx] %32phN\n", folio->index + offset, block);
- kunmap_local(block);
- }
- }
+ do {
+ if (!afs_dir_check_block(dvnode, progress, iter_base))
+ break;
+ iter_base += AFS_DIR_BLOCK_SIZE;
+ len -= AFS_DIR_BLOCK_SIZE;
+ } while (len > 0);
+
+ return len;
}
/*
- * Check all the blocks in a directory. All the folios are held pinned.
+ * Check all the blocks in a directory.
*/
-static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
+static int afs_dir_check(struct afs_vnode *dvnode)
{
- struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- struct folio *folio;
- pgoff_t last = req->nr_pages - 1;
- int ret = 0;
+ struct iov_iter iter;
+ unsigned long long i_size = i_size_read(&dvnode->netfs.inode);
+ size_t checked = 0;
- XA_STATE(xas, &mapping->i_pages, 0);
-
- if (unlikely(!req->nr_pages))
+ if (unlikely(!i_size))
return 0;
- rcu_read_lock();
- xas_for_each(&xas, folio, last) {
- if (xas_retry(&xas, folio))
- continue;
-
- BUG_ON(folio->mapping != mapping);
-
- if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) {
- afs_dir_dump(dvnode, req);
- ret = -EIO;
- break;
- }
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ checked = iterate_folioq(&iter, iov_iter_count(&iter), dvnode, NULL,
+ afs_dir_check_step);
+ if (checked != i_size) {
+ afs_dir_dump(dvnode);
+ return -EIO;
}
-
- rcu_read_unlock();
- return ret;
+ return 0;
}
/*
@@ -264,134 +225,140 @@ static int afs_dir_open(struct inode *inode, struct file *file)
}
/*
- * Read the directory into the pagecache in one go, scrubbing the previous
- * contents. The list of folios is returned, pinning them so that they don't
- * get reclaimed during the iteration.
+ * Read a file in a single download.
*/
-static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
- __acquires(&dvnode->validate_lock)
+static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
{
- struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- struct afs_read *req;
+ struct iov_iter iter;
+ ssize_t ret;
loff_t i_size;
- int nr_pages, i;
- int ret;
- loff_t remote_size = 0;
-
- _enter("");
+ bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
+ !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
- req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
- return ERR_PTR(-ENOMEM);
-
- refcount_set(&req->usage, 1);
- req->vnode = dvnode;
- req->key = key_get(key);
- req->cleanup = afs_dir_read_cleanup;
-
-expand:
i_size = i_size_read(&dvnode->netfs.inode);
- if (i_size < remote_size)
- i_size = remote_size;
- if (i_size < 2048) {
- ret = afs_bad(dvnode, afs_file_error_dir_small);
- goto error;
- }
- if (i_size > 2048 * 1024) {
- trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
- ret = -EFBIG;
- goto error;
+ if (is_dir) {
+ if (i_size < AFS_DIR_BLOCK_SIZE)
+ return afs_bad(dvnode, afs_file_error_dir_small);
+ if (i_size > AFS_DIR_BLOCK_SIZE * 1024) {
+ trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+ return -EFBIG;
+ }
+ } else {
+ if (i_size > AFSPATHMAX) {
+ trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+ return -EFBIG;
+ }
}
- _enter("%llu", i_size);
+ /* Expand the storage. TODO: Shrink the storage too. */
+ if (dvnode->directory_size < i_size) {
+ size_t cur_size = dvnode->directory_size;
- nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
+ ret = netfs_alloc_folioq_buffer(NULL,
+ &dvnode->directory, &cur_size, i_size,
+ mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
+ dvnode->directory_size = cur_size;
+ if (ret < 0)
+ return ret;
+ }
- req->actual_len = i_size; /* May change */
- req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
- req->data_version = dvnode->status.data_version; /* May change */
- iov_iter_xarray(&req->def_iter, ITER_DEST, &dvnode->netfs.inode.i_mapping->i_pages,
- 0, i_size);
- req->iter = &req->def_iter;
+ iov_iter_folio_queue(&iter, ITER_DEST, dvnode->directory, 0, 0, dvnode->directory_size);
- /* Fill in any gaps that we might find where the memory reclaimer has
- * been at work and pin all the folios. If there are any gaps, we will
- * need to reread the entire directory contents.
+ /* AFS requires us to perform the read of a directory synchronously as
+ * a single unit to avoid issues with the directory contents being
+ * changed between reads.
*/
- i = req->nr_pages;
- while (i < nr_pages) {
- struct folio *folio;
-
- folio = filemap_get_folio(mapping, i);
- if (IS_ERR(folio)) {
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_stat_v(dvnode, n_inval);
- folio = __filemap_get_folio(mapping,
- i, FGP_LOCK | FGP_CREAT,
- mapping->gfp_mask);
- if (IS_ERR(folio)) {
- ret = PTR_ERR(folio);
- goto error;
- }
- folio_attach_private(folio, (void *)1);
- folio_unlock(folio);
+ ret = netfs_read_single(&dvnode->netfs.inode, file, &iter);
+ if (ret >= 0) {
+ i_size = i_size_read(&dvnode->netfs.inode);
+ if (i_size > ret) {
+ /* The content has grown, so we need to expand the
+ * buffer.
+ */
+ ret = -ESTALE;
+ } else if (is_dir) {
+ int ret2 = afs_dir_check(dvnode);
+
+ if (ret2 < 0)
+ ret = ret2;
+ } else if (i_size < folioq_folio_size(dvnode->directory, 0)) {
+ /* NUL-terminate a symlink. */
+ char *symlink = kmap_local_folio(folioq_folio(dvnode->directory, 0), 0);
+
+ symlink[i_size] = 0;
+ kunmap_local(symlink);
}
-
- req->nr_pages += folio_nr_pages(folio);
- i += folio_nr_pages(folio);
}
- /* If we're going to reload, we need to lock all the pages to prevent
- * races.
- */
+ return ret;
+}
+
+ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
+{
+ ssize_t ret;
+
+ fscache_use_cookie(afs_vnode_cache(dvnode), false);
+ ret = afs_do_read_single(dvnode, file);
+ fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL);
+ return ret;
+}
+
+/*
+ * Read the directory into a folio_queue buffer in one go, scrubbing the
+ * previous contents. We return -ESTALE if the caller needs to call us again.
+ */
+ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
+ __acquires(&dvnode->validate_lock)
+{
+ ssize_t ret;
+ loff_t i_size;
+
+ i_size = i_size_read(&dvnode->netfs.inode);
+
ret = -ERESTARTSYS;
if (down_read_killable(&dvnode->validate_lock) < 0)
goto error;
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- goto success;
+ /* We only need to reread the data if it became invalid - or if we
+ * haven't read it yet.
+ */
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) {
+ ret = i_size;
+ goto valid;
+ }
up_read(&dvnode->validate_lock);
if (down_write_killable(&dvnode->validate_lock) < 0)
goto error;
- if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
- trace_afs_reload_dir(dvnode);
- ret = afs_fetch_data(dvnode, req);
- if (ret < 0)
- goto error_unlock;
-
- task_io_account_read(PAGE_SIZE * req->nr_pages);
-
- if (req->len < req->file_size) {
- /* The content has grown, so we need to expand the
- * buffer.
- */
- up_write(&dvnode->validate_lock);
- remote_size = req->file_size;
- goto expand;
- }
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ afs_invalidate_cache(dvnode, 0);
- /* Validate the data we just read. */
- ret = afs_dir_check(dvnode, req);
+ if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) ||
+ !test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) {
+ trace_afs_reload_dir(dvnode);
+ ret = afs_read_single(dvnode, file);
if (ret < 0)
goto error_unlock;
// TODO: Trim excess pages
set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+ set_bit(AFS_VNODE_DIR_READ, &dvnode->flags);
+ } else {
+ ret = i_size;
}
downgrade_write(&dvnode->validate_lock);
-success:
- return req;
+valid:
+ return ret;
error_unlock:
up_write(&dvnode->validate_lock);
error:
- afs_put_read(req);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
+ _leave(" = %zd", ret);
+ return ret;
}
/*
@@ -399,79 +366,69 @@ error:
*/
static int afs_dir_iterate_block(struct afs_vnode *dvnode,
struct dir_context *ctx,
- union afs_xdr_dir_block *block,
- unsigned blkoff)
+ union afs_xdr_dir_block *block)
{
union afs_xdr_dirent *dire;
- unsigned offset, next, curr, nr_slots;
+ unsigned int blknum, base, hdr, pos, next, nr_slots;
size_t nlen;
int tmp;
- _enter("%llx,%x", ctx->pos, blkoff);
+ blknum = ctx->pos / AFS_DIR_BLOCK_SIZE;
+ base = blknum * AFS_DIR_SLOTS_PER_BLOCK;
+ hdr = (blknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+ pos = DIV_ROUND_UP(ctx->pos, AFS_DIR_DIRENT_SIZE) - base;
- curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent);
+ _enter("%llx,%x", ctx->pos, blknum);
/* walk through the block, an entry at a time */
- for (offset = (blkoff == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
- offset < AFS_DIR_SLOTS_PER_BLOCK;
- offset = next
- ) {
+ for (unsigned int slot = hdr; slot < AFS_DIR_SLOTS_PER_BLOCK; slot = next) {
/* skip entries marked unused in the bitmap */
- if (!(block->hdr.bitmap[offset / 8] &
- (1 << (offset % 8)))) {
- _debug("ENT[%zu.%u]: unused",
- blkoff / sizeof(union afs_xdr_dir_block), offset);
- next = offset + 1;
- if (offset >= curr)
- ctx->pos = blkoff +
- next * sizeof(union afs_xdr_dirent);
+ if (!(block->hdr.bitmap[slot / 8] &
+ (1 << (slot % 8)))) {
+ _debug("ENT[%x]: Unused", base + slot);
+ next = slot + 1;
+ if (next >= pos)
+ ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
continue;
}
/* got a valid entry */
- dire = &block->dirents[offset];
+ dire = &block->dirents[slot];
nlen = strnlen(dire->u.name,
- sizeof(*block) -
- offset * sizeof(union afs_xdr_dirent));
+ (unsigned long)(block + 1) - (unsigned long)dire->u.name - 1);
if (nlen > AFSNAMEMAX - 1) {
- _debug("ENT[%zu]: name too long (len %u/%zu)",
- blkoff / sizeof(union afs_xdr_dir_block),
- offset, nlen);
+ _debug("ENT[%x]: Name too long (len %zx)",
+ base + slot, nlen);
return afs_bad(dvnode, afs_file_error_dir_name_too_long);
}
- _debug("ENT[%zu.%u]: %s %zu \"%s\"",
- blkoff / sizeof(union afs_xdr_dir_block), offset,
- (offset < curr ? "skip" : "fill"),
+ _debug("ENT[%x]: %s %zx \"%s\"",
+ base + slot, (slot < pos ? "skip" : "fill"),
nlen, dire->u.name);
nr_slots = afs_dir_calc_slots(nlen);
- next = offset + nr_slots;
+ next = slot + nr_slots;
if (next > AFS_DIR_SLOTS_PER_BLOCK) {
- _debug("ENT[%zu.%u]:"
- " %u extends beyond end dir block"
- " (len %zu)",
- blkoff / sizeof(union afs_xdr_dir_block),
- offset, next, nlen);
+ _debug("ENT[%x]: extends beyond end dir block (len %zx)",
+ base + slot, nlen);
return afs_bad(dvnode, afs_file_error_dir_over_end);
}
/* Check that the name-extension dirents are all allocated */
for (tmp = 1; tmp < nr_slots; tmp++) {
- unsigned int ix = offset + tmp;
- if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) {
- _debug("ENT[%zu.u]:"
- " %u unmarked extension (%u/%u)",
- blkoff / sizeof(union afs_xdr_dir_block),
- offset, tmp, nr_slots);
+ unsigned int xslot = slot + tmp;
+
+ if (!(block->hdr.bitmap[xslot / 8] & (1 << (xslot % 8)))) {
+ _debug("ENT[%x]: Unmarked extension (%x/%x)",
+ base + slot, tmp, nr_slots);
return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
}
}
/* skip if starts before the current position */
- if (offset < curr) {
- if (next > curr)
- ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ if (slot < pos) {
+ if (next > pos)
+ ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
continue;
}
@@ -485,75 +442,110 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
return 0;
}
- ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
}
_leave(" = 1 [more]");
return 1;
}
+struct afs_dir_iteration_ctx {
+ struct dir_context *dir_ctx;
+ int error;
+};
+
/*
- * iterate through the data blob that lists the contents of an AFS directory
+ * Iterate through a kmapped directory segment.
*/
-static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
- struct key *key, afs_dataversion_t *_dir_version)
+static size_t afs_dir_iterate_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
{
- struct afs_vnode *dvnode = AFS_FS_I(dir);
- union afs_xdr_dir_block *dblock;
- struct afs_read *req;
- struct folio *folio;
- unsigned offset, size;
+ struct afs_dir_iteration_ctx *ctx = priv2;
+ struct afs_vnode *dvnode = priv;
int ret;
- _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
-
- if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
- _leave(" = -ESTALE");
- return -ESTALE;
+ if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE ||
+ len % AFS_DIR_BLOCK_SIZE)) {
+ pr_err("Mis-iteration prog=%zx len=%zx\n",
+ progress % AFS_DIR_BLOCK_SIZE,
+ len % AFS_DIR_BLOCK_SIZE);
+ return len;
}
- req = afs_read_dir(dvnode, key);
- if (IS_ERR(req))
- return PTR_ERR(req);
- *_dir_version = req->data_version;
+ do {
+ ret = afs_dir_iterate_block(dvnode, ctx->dir_ctx, iter_base);
+ if (ret != 1)
+ break;
- /* round the file position up to the next entry boundary */
- ctx->pos += sizeof(union afs_xdr_dirent) - 1;
- ctx->pos &= ~(sizeof(union afs_xdr_dirent) - 1);
+ ctx->dir_ctx->pos = round_up(ctx->dir_ctx->pos, AFS_DIR_BLOCK_SIZE);
+ iter_base += AFS_DIR_BLOCK_SIZE;
+ len -= AFS_DIR_BLOCK_SIZE;
+ } while (len > 0);
- /* walk through the blocks in sequence */
- ret = 0;
- while (ctx->pos < req->actual_len) {
- /* Fetch the appropriate folio from the directory and re-add it
- * to the LRU. We have all the pages pinned with an extra ref.
- */
- folio = __filemap_get_folio(dir->i_mapping, ctx->pos / PAGE_SIZE,
- FGP_ACCESSED, 0);
- if (IS_ERR(folio)) {
- ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
- break;
- }
+ return len;
+}
- offset = round_down(ctx->pos, sizeof(*dblock)) - folio_pos(folio);
- size = min_t(loff_t, folio_size(folio),
- req->actual_len - folio_pos(folio));
+/*
+ * Iterate through the directory folios.
+ */
+static int afs_dir_iterate_contents(struct inode *dir, struct dir_context *dir_ctx)
+{
+ struct afs_dir_iteration_ctx ctx = { .dir_ctx = dir_ctx };
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ struct iov_iter iter;
+ unsigned long long i_size = i_size_read(dir);
- do {
- dblock = kmap_local_folio(folio, offset);
- ret = afs_dir_iterate_block(dvnode, ctx, dblock,
- folio_pos(folio) + offset);
- kunmap_local(dblock);
- if (ret != 1)
- goto out;
+ /* Round the file position up to the next entry boundary */
+ dir_ctx->pos = round_up(dir_ctx->pos, sizeof(union afs_xdr_dirent));
- } while (offset += sizeof(*dblock), offset < size);
+ if (i_size <= 0 || dir_ctx->pos >= i_size)
+ return 0;
- ret = 0;
- }
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ iov_iter_advance(&iter, round_down(dir_ctx->pos, AFS_DIR_BLOCK_SIZE));
+
+ iterate_folioq(&iter, iov_iter_count(&iter), dvnode, &ctx,
+ afs_dir_iterate_step);
+
+ if (ctx.error == -ESTALE)
+ afs_invalidate_dir(dvnode, afs_dir_invalid_iter_stale);
+ return ctx.error;
+}
+
+/*
+ * iterate through the data blob that lists the contents of an AFS directory
+ */
+static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
+ struct file *file, afs_dataversion_t *_dir_version)
+{
+ struct afs_vnode *dvnode = AFS_FS_I(dir);
+ int retry_limit = 100;
+ int ret;
+
+ _enter("{%lu},%llx,,", dir->i_ino, ctx->pos);
+
+ do {
+ if (--retry_limit < 0) {
+ pr_warn("afs_read_dir(): Too many retries\n");
+ ret = -ESTALE;
+ break;
+ }
+ ret = afs_read_dir(dvnode, file);
+ if (ret < 0) {
+ if (ret != -ESTALE)
+ break;
+ if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
+ ret = -ESTALE;
+ break;
+ }
+ continue;
+ }
+ *_dir_version = inode_peek_iversion_raw(dir);
+
+ ret = afs_dir_iterate_contents(dir, ctx);
+ up_read(&dvnode->validate_lock);
+ } while (ret == -ESTALE);
-out:
- up_read(&dvnode->validate_lock);
- afs_put_read(req);
_leave(" = %d", ret);
return ret;
}
@@ -565,8 +557,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx)
{
afs_dataversion_t dir_version;
- return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file),
- &dir_version);
+ return afs_dir_iterate(file_inode(file), ctx, file, &dir_version);
}
/*
@@ -607,7 +598,7 @@ static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
* - just returns the FID the dentry name maps to if found
*/
static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
- struct afs_fid *fid, struct key *key,
+ struct afs_fid *fid,
afs_dataversion_t *_dir_version)
{
struct afs_super_info *as = dir->i_sb->s_fs_info;
@@ -621,7 +612,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
/* search the directory */
- ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version);
+ ret = afs_dir_iterate(dir, &cookie.ctx, NULL, _dir_version);
if (ret < 0) {
_leave(" = %d [iter]", ret);
return ret;
@@ -656,19 +647,10 @@ static bool afs_lookup_filldir(struct dir_context *ctx, const char *name,
BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
- if (cookie->found) {
- if (cookie->nr_fids < 50) {
- cookie->fids[cookie->nr_fids].vnode = ino;
- cookie->fids[cookie->nr_fids].unique = dtype;
- cookie->nr_fids++;
- }
- } else if (cookie->name.len == nlen &&
- memcmp(cookie->name.name, name, nlen) == 0) {
- cookie->fids[1].vnode = ino;
- cookie->fids[1].unique = dtype;
- cookie->found = 1;
- if (cookie->one_only)
- return false;
+ if (cookie->nr_fids < 50) {
+ cookie->fids[cookie->nr_fids].vnode = ino;
+ cookie->fids[cookie->nr_fids].unique = dtype;
+ cookie->nr_fids++;
}
return cookie->nr_fids < 50;
@@ -788,8 +770,7 @@ static bool afs_server_supports_ibulk(struct afs_vnode *dvnode)
* files in one go and create inodes for them. The inode of the file we were
* asked for is returned.
*/
-static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
- struct key *key)
+static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
{
struct afs_lookup_cookie *cookie;
struct afs_vnode_param *vp;
@@ -797,6 +778,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
struct inode *inode = NULL, *ti;
afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
+ bool supports_ibulk;
long ret;
int i;
@@ -813,19 +795,19 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
cookie->nr_fids = 2; /* slot 1 is saved for the fid we actually want
* and slot 0 for the directory */
- if (!afs_server_supports_ibulk(dvnode))
- cookie->one_only = true;
-
- /* search the directory */
- ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version);
+ /* Search the directory for the named entry using the hash table... */
+ ret = afs_dir_search(dvnode, &dentry->d_name, &cookie->fids[1], &data_version);
if (ret < 0)
goto out;
- dentry->d_fsdata = (void *)(unsigned long)data_version;
+ supports_ibulk = afs_server_supports_ibulk(dvnode);
+ if (supports_ibulk) {
+ /* ...then scan linearly from that point for entries to lookup-ahead. */
+ cookie->ctx.pos = (ret + 1) * AFS_DIR_DIRENT_SIZE;
+ afs_dir_iterate(dir, &cookie->ctx, NULL, &data_version);
+ }
- ret = -ENOENT;
- if (!cookie->found)
- goto out;
+ dentry->d_fsdata = (void *)(unsigned long)data_version;
/* Check to see if we already have an inode for the primary fid. */
inode = ilookup5(dir->i_sb, cookie->fids[1].vnode,
@@ -884,7 +866,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
* the whole operation.
*/
afs_op_set_error(op, -ENOTSUPP);
- if (!cookie->one_only) {
+ if (supports_ibulk) {
op->ops = &afs_inline_bulk_status_operation;
afs_begin_vnode_operation(op);
afs_wait_for_operation(op);
@@ -926,8 +908,7 @@ out:
/*
* Look up an entry in a directory with @sys substitution.
*/
-static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry,
- struct key *key)
+static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry)
{
struct afs_sysnames *subs;
struct afs_net *net = afs_i2net(dir);
@@ -975,7 +956,6 @@ out_s:
afs_put_sysnames(subs);
kfree(buf);
out_p:
- key_put(key);
return ret;
}
@@ -989,7 +969,6 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
struct afs_fid fid = {};
struct inode *inode;
struct dentry *d;
- struct key *key;
int ret;
_enter("{%llx:%llu},%p{%pd},",
@@ -1007,15 +986,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ESTALE);
}
- key = afs_request_key(dvnode->volume->cell);
- if (IS_ERR(key)) {
- _leave(" = %ld [key]", PTR_ERR(key));
- return ERR_CAST(key);
- }
-
- ret = afs_validate(dvnode, key);
+ ret = afs_validate(dvnode, NULL);
if (ret < 0) {
- key_put(key);
+ afs_dir_unuse_cookie(dvnode, ret);
_leave(" = %d [val]", ret);
return ERR_PTR(ret);
}
@@ -1025,11 +998,10 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
dentry->d_name.name[dentry->d_name.len - 3] == 's' &&
dentry->d_name.name[dentry->d_name.len - 2] == 'y' &&
dentry->d_name.name[dentry->d_name.len - 1] == 's')
- return afs_lookup_atsys(dir, dentry, key);
+ return afs_lookup_atsys(dir, dentry);
afs_stat_v(dvnode, n_lookup);
- inode = afs_do_lookup(dir, dentry, key);
- key_put(key);
+ inode = afs_do_lookup(dir, dentry);
if (inode == ERR_PTR(-ENOENT))
inode = afs_try_auto_mntpt(dentry, dir);
@@ -1155,7 +1127,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
afs_stat_v(dir, n_reval);
/* search the directory for this vnode */
- ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, key, &dir_version);
+ ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, &dir_version);
switch (ret) {
case 0:
/* the filename maps to something */
@@ -1282,6 +1254,7 @@ void afs_check_for_remote_deletion(struct afs_operation *op)
*/
static void afs_vnode_new_inode(struct afs_operation *op)
{
+ struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode_param *vp = &op->file[1];
struct afs_vnode *vnode;
struct inode *inode;
@@ -1301,6 +1274,10 @@ static void afs_vnode_new_inode(struct afs_operation *op)
vnode = AFS_FS_I(inode);
set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+ if (S_ISDIR(inode->i_mode))
+ afs_mkdir_init_dir(vnode, dvp->vnode);
+ else if (S_ISLNK(inode->i_mode))
+ afs_init_new_symlink(vnode, op);
if (!afs_op_error(op))
afs_cache_permit(vnode, op->key, vnode->cb_break, &vp->scb);
d_instantiate(op->dentry, inode);
@@ -1317,18 +1294,21 @@ static void afs_create_success(struct afs_operation *op)
static void afs_create_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources cres = {};
struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode_param *vp = &op->file[1];
struct afs_vnode *dvnode = dvp->vnode;
_enter("op=%08x", op->debug_id);
+ fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
afs_edit_dir_add(dvnode, &op->dentry->d_name, &vp->fid,
op->create.reason);
up_write(&dvnode->validate_lock);
+ fscache_end_operation(&cres);
}
static void afs_create_put(struct afs_operation *op)
@@ -1356,6 +1336,7 @@ static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
{
struct afs_operation *op;
struct afs_vnode *dvnode = AFS_FS_I(dir);
+ int ret;
_enter("{%llx:%llu},{%pd},%ho",
dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
@@ -1366,6 +1347,8 @@ static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
return PTR_ERR(op);
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1375,7 +1358,9 @@ static int afs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
op->create.reason = afs_edit_dir_for_mkdir;
op->mtime = current_time(dir);
op->ops = &afs_mkdir_operation;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
}
/*
@@ -1388,8 +1373,8 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
clear_nlink(&vnode->netfs.inode);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_clear_cb_promise(vnode, afs_cb_promise_clear_rmdir);
+ afs_invalidate_dir(vnode, afs_dir_invalid_subdir_removed);
}
}
@@ -1403,18 +1388,21 @@ static void afs_rmdir_success(struct afs_operation *op)
static void afs_rmdir_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources cres = {};
struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode *dvnode = dvp->vnode;
_enter("op=%08x", op->debug_id);
afs_dir_remove_subdir(op->dentry);
+ fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
afs_edit_dir_remove(dvnode, &op->dentry->d_name,
afs_edit_dir_for_rmdir);
up_write(&dvnode->validate_lock);
+ fscache_end_operation(&cres);
}
static void afs_rmdir_put(struct afs_operation *op)
@@ -1449,6 +1437,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
if (IS_ERR(op))
return PTR_ERR(op);
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1472,10 +1462,18 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
op->file[1].vnode = vnode;
}
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+
+ /* Not all systems that can host afs servers have ENOTEMPTY. */
+ if (ret == -EEXIST)
+ ret = -ENOTEMPTY;
+out:
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error:
- return afs_put_operation(op);
+ ret = afs_put_operation(op);
+ goto out;
}
/*
@@ -1538,16 +1536,19 @@ static void afs_unlink_success(struct afs_operation *op)
static void afs_unlink_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources cres = {};
struct afs_vnode_param *dvp = &op->file[0];
struct afs_vnode *dvnode = dvp->vnode;
_enter("op=%08x", op->debug_id);
+ fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
afs_edit_dir_remove(dvnode, &op->dentry->d_name,
afs_edit_dir_for_unlink);
up_write(&dvnode->validate_lock);
+ fscache_end_operation(&cres);
}
static void afs_unlink_put(struct afs_operation *op)
@@ -1586,6 +1587,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
if (IS_ERR(op))
return PTR_ERR(op);
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1632,10 +1635,10 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
afs_wait_for_operation(op);
}
- return afs_put_operation(op);
-
error:
- return afs_put_operation(op);
+ ret = afs_put_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
}
static const struct afs_operation_ops afs_create_operation = {
@@ -1669,6 +1672,8 @@ static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
goto error;
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
op->file[0].modification = true;
@@ -1679,7 +1684,9 @@ static int afs_create(struct mnt_idmap *idmap, struct inode *dir,
op->create.reason = afs_edit_dir_for_create;
op->mtime = current_time(dir);
op->ops = &afs_create_operation;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error:
d_drop(dentry);
@@ -1744,6 +1751,8 @@ static int afs_link(struct dentry *from, struct inode *dir,
goto error;
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
ret = afs_validate(vnode, op->key);
if (ret < 0)
goto error_op;
@@ -1759,10 +1768,13 @@ static int afs_link(struct dentry *from, struct inode *dir,
op->dentry_2 = from;
op->ops = &afs_link_operation;
op->create.reason = afs_edit_dir_for_link;
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error_op:
afs_put_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
error:
d_drop(dentry);
_leave(" = %d", ret);
@@ -1806,6 +1818,8 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
goto error;
}
+ fscache_use_cookie(afs_vnode_cache(dvnode), true);
+
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
@@ -1814,7 +1828,9 @@ static int afs_symlink(struct mnt_idmap *idmap, struct inode *dir,
op->create.reason = afs_edit_dir_for_symlink;
op->create.symlink = content;
op->mtime = current_time(dir);
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+ afs_dir_unuse_cookie(dvnode, ret);
+ return ret;
error:
d_drop(dentry);
@@ -1846,6 +1862,7 @@ static void afs_rename_success(struct afs_operation *op)
write_seqlock(&vnode->cb_lock);
new_dv = vnode->status.data_version + 1;
+ trace_afs_set_dv(vnode, new_dv);
vnode->status.data_version = new_dv;
inode_set_iversion_raw(&vnode->netfs.inode, new_dv);
@@ -1855,6 +1872,7 @@ static void afs_rename_success(struct afs_operation *op)
static void afs_rename_edit_dir(struct afs_operation *op)
{
+ struct netfs_cache_resources orig_cres = {}, new_cres = {};
struct afs_vnode_param *orig_dvp = &op->file[0];
struct afs_vnode_param *new_dvp = &op->file[1];
struct afs_vnode *orig_dvnode = orig_dvp->vnode;
@@ -1871,6 +1889,10 @@ static void afs_rename_edit_dir(struct afs_operation *op)
op->rename.rehash = NULL;
}
+ fscache_begin_write_operation(&orig_cres, afs_vnode_cache(orig_dvnode));
+ if (new_dvnode != orig_dvnode)
+ fscache_begin_write_operation(&new_cres, afs_vnode_cache(new_dvnode));
+
down_write(&orig_dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta)
@@ -1920,6 +1942,9 @@ static void afs_rename_edit_dir(struct afs_operation *op)
d_move(old_dentry, new_dentry);
up_write(&new_dvnode->validate_lock);
+ fscache_end_operation(&orig_cres);
+ if (new_dvnode != orig_dvnode)
+ fscache_end_operation(&new_cres);
}
static void afs_rename_put(struct afs_operation *op)
@@ -1972,6 +1997,10 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (IS_ERR(op))
return PTR_ERR(op);
+ fscache_use_cookie(afs_vnode_cache(orig_dvnode), true);
+ if (new_dvnode != orig_dvnode)
+ fscache_use_cookie(afs_vnode_cache(new_dvnode), true);
+
ret = afs_validate(vnode, op->key);
afs_op_set_error(op, ret);
if (ret < 0)
@@ -2039,47 +2068,43 @@ static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
*/
d_drop(old_dentry);
- return afs_do_sync_operation(op);
+ ret = afs_do_sync_operation(op);
+out:
+ afs_dir_unuse_cookie(orig_dvnode, ret);
+ if (new_dvnode != orig_dvnode)
+ afs_dir_unuse_cookie(new_dvnode, ret);
+ return ret;
error:
- return afs_put_operation(op);
-}
-
-/*
- * Release a directory folio and clean up its private state if it's not busy
- * - return true if the folio can now be released, false if not
- */
-static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags)
-{
- struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
-
- _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio->index);
-
- folio_detach_private(folio);
-
- /* The directory will need reloading. */
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_stat_v(dvnode, n_relpg);
- return true;
+ ret = afs_put_operation(op);
+ goto out;
}
/*
- * Invalidate part or all of a folio.
+ * Write the file contents to the cache as a single blob.
*/
-static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
- size_t length)
+int afs_single_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
- struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
-
- _enter("{%lu},%zu,%zu", folio->index, offset, length);
-
- BUG_ON(!folio_test_locked(folio));
+ struct afs_vnode *dvnode = AFS_FS_I(mapping->host);
+ struct iov_iter iter;
+ bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
+ !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
+ int ret = 0;
- /* The directory will need reloading. */
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_stat_v(dvnode, n_inval);
+ /* Need to lock to prevent the folio queue and folios from being thrown
+ * away.
+ */
+ down_read(&dvnode->validate_lock);
+
+ if (is_dir ?
+ test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) :
+ atomic64_read(&dvnode->cb_expires_at) != AFS_NO_CB_PROMISE) {
+ iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0,
+ i_size_read(&dvnode->netfs.inode));
+ ret = netfs_writeback_single(mapping, wbc, &iter);
+ }
- /* we clean up only if the entire folio is being invalidated */
- if (offset == 0 && length == folio_size(folio))
- folio_detach_private(folio);
+ up_read(&dvnode->validate_lock);
+ return ret;
}
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index fe223fb78111..60a549f1d9c5 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -10,6 +10,7 @@
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/iversion.h>
+#include <linux/folio_queue.h>
#include "internal.h"
#include "xdr_fs.h"
@@ -105,23 +106,57 @@ static void afs_clear_contig_bits(union afs_xdr_dir_block *block,
}
/*
- * Get a new directory folio.
+ * Get a specific block, extending the directory storage to cover it as needed.
*/
-static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
+static union afs_xdr_dir_block *afs_dir_get_block(struct afs_dir_iter *iter, size_t block)
{
- struct address_space *mapping = vnode->netfs.inode.i_mapping;
+ struct folio_queue *fq;
+ struct afs_vnode *dvnode = iter->dvnode;
struct folio *folio;
+ size_t blpos = block * AFS_DIR_BLOCK_SIZE;
+ size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
+ int ret;
+
+ if (dvnode->directory_size < blend) {
+ size_t cur_size = dvnode->directory_size;
+
+ ret = netfs_alloc_folioq_buffer(
+ NULL, &dvnode->directory, &cur_size, blend,
+ mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
+ dvnode->directory_size = cur_size;
+ if (ret < 0)
+ goto fail;
+ }
- folio = __filemap_get_folio(mapping, index,
- FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
- mapping->gfp_mask);
- if (IS_ERR(folio)) {
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
- return NULL;
+ fq = iter->fq;
+ if (!fq)
+ fq = dvnode->directory;
+
+ /* Search the folio queue for the folio containing the block... */
+ for (; fq; fq = fq->next) {
+ for (int s = iter->fq_slot; s < folioq_count(fq); s++) {
+ size_t fsize = folioq_folio_size(fq, s);
+
+ if (blend <= fpos + fsize) {
+ /* ... and then return the mapped block. */
+ folio = folioq_folio(fq, s);
+ if (WARN_ON_ONCE(folio_pos(folio) != fpos))
+ goto fail;
+ iter->fq = fq;
+ iter->fq_slot = s;
+ iter->fpos = fpos;
+ return kmap_local_folio(folio, blpos - fpos);
+ }
+ fpos += fsize;
+ }
+ iter->fq_slot = 0;
}
- if (!folio_test_private(folio))
- folio_attach_private(folio, (void *)1);
- return folio;
+
+fail:
+ iter->fq = NULL;
+ iter->fq_slot = 0;
+ afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
+ return NULL;
}
/*
@@ -209,9 +244,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
{
union afs_xdr_dir_block *meta, *block;
union afs_xdr_dirent *de;
- struct folio *folio0, *folio;
- unsigned int need_slots, nr_blocks, b;
- pgoff_t index;
+ struct afs_dir_iter iter = { .dvnode = vnode };
+ unsigned int nr_blocks, b, entry;
loff_t i_size;
int slot;
@@ -220,20 +254,17 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
i_size = i_size_read(&vnode->netfs.inode);
if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
(i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_bad_size);
return;
}
- folio0 = afs_dir_get_folio(vnode, 0);
- if (!folio0) {
- _leave(" [fgp]");
+ meta = afs_dir_get_block(&iter, 0);
+ if (!meta)
return;
- }
/* Work out how many slots we're going to need. */
- need_slots = afs_dir_calc_slots(name->len);
+ iter.nr_slots = afs_dir_calc_slots(name->len);
- meta = kmap_local_folio(folio0, 0);
if (i_size == 0)
goto new_directory;
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
@@ -245,22 +276,21 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
/* If the directory extended into a new folio, then we need to
* tack a new folio on the end.
*/
- index = b / AFS_DIR_BLOCKS_PER_PAGE;
if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
- goto error;
- if (index >= folio_nr_pages(folio0)) {
- folio = afs_dir_get_folio(vnode, index);
- if (!folio)
- goto error;
- } else {
- folio = folio0;
- }
+ goto error_too_many_blocks;
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
+ /* Lower dir blocks have a counter in the header we can check. */
+ if (b < AFS_DIR_BLOCKS_WITH_CTR &&
+ meta->meta.alloc_ctrs[b] < iter.nr_slots)
+ continue;
+
+ block = afs_dir_get_block(&iter, b);
+ if (!block)
+ goto error;
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- goto invalidated;
+ goto already_invalidated;
_debug("block %u: %2u %3u %u",
b,
@@ -275,31 +305,23 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
}
- /* Only lower dir blocks have a counter in the header. */
- if (b >= AFS_DIR_BLOCKS_WITH_CTR ||
- meta->meta.alloc_ctrs[b] >= need_slots) {
- /* We need to try and find one or more consecutive
- * slots to hold the entry.
- */
- slot = afs_find_contig_bits(block, need_slots);
- if (slot >= 0) {
- _debug("slot %u", slot);
- goto found_space;
- }
+ /* We need to try and find one or more consecutive slots to
+ * hold the entry.
+ */
+ slot = afs_find_contig_bits(block, iter.nr_slots);
+ if (slot >= 0) {
+ _debug("slot %u", slot);
+ goto found_space;
}
kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
}
/* There are no spare slots of sufficient size, yet the operation
* succeeded. Download the directory again.
*/
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_no_slots);
goto out_unmap;
new_directory:
@@ -307,8 +329,7 @@ new_directory:
i_size = AFS_DIR_BLOCK_SIZE;
afs_set_i_size(vnode, i_size);
slot = AFS_DIR_RESV_BLOCKS0;
- folio = folio0;
- block = kmap_local_folio(folio, 0);
+ block = afs_dir_get_block(&iter, 0);
nr_blocks = 1;
b = 0;
@@ -326,41 +347,39 @@ found_space:
de->u.name[name->len] = 0;
/* Adjust the bitmap. */
- afs_set_contig_bits(block, slot, need_slots);
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
+ afs_set_contig_bits(block, slot, iter.nr_slots);
/* Adjust the allocation counter. */
if (b < AFS_DIR_BLOCKS_WITH_CTR)
- meta->meta.alloc_ctrs[b] -= need_slots;
+ meta->meta.alloc_ctrs[b] -= iter.nr_slots;
+
+ /* Adjust the hash chain. */
+ entry = b * AFS_DIR_SLOTS_PER_BLOCK + slot;
+ iter.bucket = afs_dir_hash_name(name);
+ de->u.hash_next = meta->meta.hashtable[iter.bucket];
+ meta->meta.hashtable[iter.bucket] = htons(entry);
+ kunmap_local(block);
inode_inc_iversion_raw(&vnode->netfs.inode);
afs_stat_v(vnode, n_dir_cr);
_debug("Insert %s in %u[%u]", name->name, b, slot);
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+
out_unmap:
kunmap_local(meta);
- folio_unlock(folio0);
- folio_put(folio0);
_leave("");
return;
-invalidated:
+already_invalidated:
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
goto out_unmap;
+error_too_many_blocks:
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_add_too_many_blocks);
error:
trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
goto out_unmap;
}
@@ -374,13 +393,14 @@ error:
void afs_edit_dir_remove(struct afs_vnode *vnode,
struct qstr *name, enum afs_edit_dir_reason why)
{
- union afs_xdr_dir_block *meta, *block;
- union afs_xdr_dirent *de;
- struct folio *folio0, *folio;
- unsigned int need_slots, nr_blocks, b;
- pgoff_t index;
+ union afs_xdr_dir_block *meta, *block, *pblock;
+ union afs_xdr_dirent *de, *pde;
+ struct afs_dir_iter iter = { .dvnode = vnode };
+ struct afs_fid fid;
+ unsigned int b, slot, entry;
loff_t i_size;
- int slot;
+ __be16 next;
+ int found;
_enter(",,{%d,%s},", name->len, name->name);
@@ -388,81 +408,95 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
if (i_size < AFS_DIR_BLOCK_SIZE ||
i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
(i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_bad_size);
return;
}
- nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
- folio0 = afs_dir_get_folio(vnode, 0);
- if (!folio0) {
- _leave(" [fgp]");
+ if (!afs_dir_init_iter(&iter, name))
return;
- }
-
- /* Work out how many slots we're going to discard. */
- need_slots = afs_dir_calc_slots(name->len);
-
- meta = kmap_local_folio(folio0, 0);
-
- /* Find a block that has sufficient slots available. Each folio
- * contains two or more directory blocks.
- */
- for (b = 0; b < nr_blocks; b++) {
- index = b / AFS_DIR_BLOCKS_PER_PAGE;
- if (index >= folio_nr_pages(folio0)) {
- folio = afs_dir_get_folio(vnode, index);
- if (!folio)
- goto error;
- } else {
- folio = folio0;
- }
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
-
- /* Abandon the edit if we got a callback break. */
- if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- goto invalidated;
-
- if (b > AFS_DIR_BLOCKS_WITH_CTR ||
- meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) {
- slot = afs_dir_scan_block(block, name, b);
- if (slot >= 0)
- goto found_dirent;
- }
+ meta = afs_dir_find_block(&iter, 0);
+ if (!meta)
+ return;
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
+ /* Find the entry in the blob. */
+ found = afs_dir_search_bucket(&iter, name, &fid);
+ if (found < 0) {
+ /* Didn't find the dirent to clobber. Re-download. */
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
+ 0, 0, 0, 0, name->name);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_rem_wrong_name);
+ goto out_unmap;
}
- /* Didn't find the dirent to clobber. Download the directory again. */
- trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
- 0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
- goto out_unmap;
+ entry = found;
+ b = entry / AFS_DIR_SLOTS_PER_BLOCK;
+ slot = entry % AFS_DIR_SLOTS_PER_BLOCK;
-found_dirent:
+ block = afs_dir_find_block(&iter, b);
+ if (!block)
+ goto error;
+ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ goto already_invalidated;
+
+ /* Check and clear the entry. */
de = &block->dirents[slot];
+ if (de->u.valid != 1)
+ goto error_unmap;
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot,
ntohl(de->u.vnode), ntohl(de->u.unique),
name->name);
- memset(de, 0, sizeof(*de) * need_slots);
-
/* Adjust the bitmap. */
- afs_clear_contig_bits(block, slot, need_slots);
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
+ afs_clear_contig_bits(block, slot, iter.nr_slots);
/* Adjust the allocation counter. */
if (b < AFS_DIR_BLOCKS_WITH_CTR)
- meta->meta.alloc_ctrs[b] += need_slots;
+ meta->meta.alloc_ctrs[b] += iter.nr_slots;
+
+ /* Clear the constituent entries. */
+ next = de->u.hash_next;
+ memset(de, 0, sizeof(*de) * iter.nr_slots);
+ kunmap_local(block);
+
+ /* Adjust the hash chain: if iter->prev_entry is 0, the hashtable head
+ * index is previous; otherwise it's slot number of the previous entry.
+ */
+ if (!iter.prev_entry) {
+ __be16 prev_next = meta->meta.hashtable[iter.bucket];
+
+ if (unlikely(prev_next != htons(entry))) {
+ pr_warn("%llx:%llx:%x: not head of chain b=%x p=%x,%x e=%x %*s",
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+ iter.bucket, iter.prev_entry, prev_next, entry,
+ name->len, name->name);
+ goto error;
+ }
+ meta->meta.hashtable[iter.bucket] = next;
+ } else {
+ unsigned int pb = iter.prev_entry / AFS_DIR_SLOTS_PER_BLOCK;
+ unsigned int ps = iter.prev_entry % AFS_DIR_SLOTS_PER_BLOCK;
+ __be16 prev_next;
+
+ pblock = afs_dir_find_block(&iter, pb);
+ if (!pblock)
+ goto error;
+ pde = &pblock->dirents[ps];
+ prev_next = pde->u.hash_next;
+ if (prev_next != htons(entry)) {
+ kunmap_local(pblock);
+ pr_warn("%llx:%llx:%x: not prev in chain b=%x p=%x,%x e=%x %*s",
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
+ iter.bucket, iter.prev_entry, prev_next, entry,
+ name->len, name->name);
+ goto error;
+ }
+ pde->u.hash_next = next;
+ kunmap_local(pblock);
+ }
+
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
afs_stat_v(vnode, n_dir_rm);
@@ -470,26 +504,20 @@ found_dirent:
out_unmap:
kunmap_local(meta);
- folio_unlock(folio0);
- folio_put(folio0);
_leave("");
return;
-invalidated:
+already_invalidated:
+ kunmap_local(block);
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
- kunmap_local(block);
- if (folio != folio0) {
- folio_unlock(folio);
- folio_put(folio);
- }
goto out_unmap;
+error_unmap:
+ kunmap_local(block);
error:
trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error,
0, 0, 0, 0, name->name);
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
goto out_unmap;
}
@@ -502,9 +530,8 @@ void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_d
{
union afs_xdr_dir_block *block;
union afs_xdr_dirent *de;
- struct folio *folio;
+ struct afs_dir_iter iter = { .dvnode = vnode };
unsigned int nr_blocks, b;
- pgoff_t index;
loff_t i_size;
int slot;
@@ -512,39 +539,35 @@ void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_d
i_size = i_size_read(&vnode->netfs.inode);
if (i_size < AFS_DIR_BLOCK_SIZE) {
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_bad_size);
return;
}
+
nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
/* Find a block that has sufficient slots available. Each folio
* contains two or more directory blocks.
*/
for (b = 0; b < nr_blocks; b++) {
- index = b / AFS_DIR_BLOCKS_PER_PAGE;
- folio = afs_dir_get_folio(vnode, index);
- if (!folio)
+ block = afs_dir_get_block(&iter, b);
+ if (!block)
goto error;
- block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
-
/* Abandon the edit if we got a callback break. */
if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- goto invalidated;
+ goto already_invalidated;
slot = afs_dir_scan_block(block, &dotdot_name, b);
if (slot >= 0)
goto found_dirent;
kunmap_local(block);
- folio_unlock(folio);
- folio_put(folio);
}
/* Didn't find the dirent to clobber. Download the directory again. */
trace_afs_edit_dir(vnode, why, afs_edit_dir_update_nodd,
0, 0, 0, 0, "..");
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ afs_invalidate_dir(vnode, afs_dir_invalid_edit_upd_no_dd);
goto out;
found_dirent:
@@ -556,26 +579,70 @@ found_dirent:
ntohl(de->u.vnode), ntohl(de->u.unique), "..");
kunmap_local(block);
- folio_unlock(folio);
- folio_put(folio);
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
out:
_leave("");
return;
-invalidated:
+already_invalidated:
kunmap_local(block);
- folio_unlock(folio);
- folio_put(folio);
trace_afs_edit_dir(vnode, why, afs_edit_dir_update_inval,
0, 0, 0, 0, "..");
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
goto out;
error:
trace_afs_edit_dir(vnode, why, afs_edit_dir_update_error,
0, 0, 0, 0, "..");
- clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
goto out;
}
+
+/*
+ * Initialise a new directory. We need to fill in the "." and ".." entries.
+ */
+void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_dvnode)
+{
+ union afs_xdr_dir_block *meta;
+ struct afs_dir_iter iter = { .dvnode = dvnode };
+ union afs_xdr_dirent *de;
+ unsigned int slot = AFS_DIR_RESV_BLOCKS0;
+ loff_t i_size;
+
+ i_size = i_size_read(&dvnode->netfs.inode);
+ if (i_size != AFS_DIR_BLOCK_SIZE) {
+ afs_invalidate_dir(dvnode, afs_dir_invalid_edit_add_bad_size);
+ return;
+ }
+
+ meta = afs_dir_get_block(&iter, 0);
+ if (!meta)
+ return;
+
+ afs_edit_init_block(meta, meta, 0);
+
+ de = &meta->dirents[slot];
+ de->u.valid = 1;
+ de->u.vnode = htonl(dvnode->fid.vnode);
+ de->u.unique = htonl(dvnode->fid.unique);
+ memcpy(de->u.name, ".", 2);
+ trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot,
+ dvnode->fid.vnode, dvnode->fid.unique, ".");
+ slot++;
+
+ de = &meta->dirents[slot];
+ de->u.valid = 1;
+ de->u.vnode = htonl(parent_dvnode->fid.vnode);
+ de->u.unique = htonl(parent_dvnode->fid.unique);
+ memcpy(de->u.name, "..", 3);
+ trace_afs_edit_dir(dvnode, afs_edit_dir_for_mkdir, afs_edit_dir_mkdir, 0, slot,
+ parent_dvnode->fid.vnode, parent_dvnode->fid.unique, "..");
+
+ afs_set_contig_bits(meta, AFS_DIR_RESV_BLOCKS0, 2);
+ meta->meta.alloc_ctrs[0] -= 2;
+ kunmap_local(meta);
+
+ netfs_single_mark_inode_dirty(&dvnode->netfs.inode);
+ set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+ set_bit(AFS_VNODE_DIR_READ, &dvnode->flags);
+}
diff --git a/fs/afs/dir_search.c b/fs/afs/dir_search.c
new file mode 100644
index 000000000000..b25bd892db4d
--- /dev/null
+++ b/fs/afs/dir_search.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Search a directory's hash table.
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * https://tools.ietf.org/html/draft-keiser-afs3-directory-object-00
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+
+/*
+ * Calculate the name hash.
+ */
+unsigned int afs_dir_hash_name(const struct qstr *name)
+{
+ const unsigned char *p = name->name;
+ unsigned int hash = 0, i;
+ int bucket;
+
+ for (i = 0; i < name->len; i++)
+ hash = (hash * 173) + p[i];
+ bucket = hash & (AFS_DIR_HASHTBL_SIZE - 1);
+ if (hash > INT_MAX) {
+ bucket = AFS_DIR_HASHTBL_SIZE - bucket;
+ bucket &= (AFS_DIR_HASHTBL_SIZE - 1);
+ }
+ return bucket;
+}
+
+/*
+ * Reset a directory iterator.
+ */
+static bool afs_dir_reset_iter(struct afs_dir_iter *iter)
+{
+ unsigned long long i_size = i_size_read(&iter->dvnode->netfs.inode);
+ unsigned int nblocks;
+
+ /* Work out the maximum number of steps we can take. */
+ nblocks = umin(i_size / AFS_DIR_BLOCK_SIZE, AFS_DIR_MAX_BLOCKS);
+ if (!nblocks)
+ return false;
+ iter->loop_check = nblocks * (AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS);
+ iter->prev_entry = 0; /* Hash head is previous */
+ return true;
+}
+
+/*
+ * Initialise a directory iterator for looking up a name.
+ */
+bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name)
+{
+ iter->nr_slots = afs_dir_calc_slots(name->len);
+ iter->bucket = afs_dir_hash_name(name);
+ return afs_dir_reset_iter(iter);
+}
+
+/*
+ * Get a specific block.
+ */
+union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block)
+{
+ struct folio_queue *fq = iter->fq;
+ struct afs_vnode *dvnode = iter->dvnode;
+ struct folio *folio;
+ size_t blpos = block * AFS_DIR_BLOCK_SIZE;
+ size_t blend = (block + 1) * AFS_DIR_BLOCK_SIZE, fpos = iter->fpos;
+ int slot = iter->fq_slot;
+
+ _enter("%zx,%d", block, slot);
+
+ if (iter->block) {
+ kunmap_local(iter->block);
+ iter->block = NULL;
+ }
+
+ if (dvnode->directory_size < blend)
+ goto fail;
+
+ if (!fq || blpos < fpos) {
+ fq = dvnode->directory;
+ slot = 0;
+ fpos = 0;
+ }
+
+ /* Search the folio queue for the folio containing the block... */
+ for (; fq; fq = fq->next) {
+ for (; slot < folioq_count(fq); slot++) {
+ size_t fsize = folioq_folio_size(fq, slot);
+
+ if (blend <= fpos + fsize) {
+ /* ... and then return the mapped block. */
+ folio = folioq_folio(fq, slot);
+ if (WARN_ON_ONCE(folio_pos(folio) != fpos))
+ goto fail;
+ iter->fq = fq;
+ iter->fq_slot = slot;
+ iter->fpos = fpos;
+ iter->block = kmap_local_folio(folio, blpos - fpos);
+ return iter->block;
+ }
+ fpos += fsize;
+ }
+ slot = 0;
+ }
+
+fail:
+ iter->fq = NULL;
+ iter->fq_slot = 0;
+ afs_invalidate_dir(dvnode, afs_dir_invalid_edit_get_block);
+ return NULL;
+}
+
+/*
+ * Search through a directory bucket.
+ */
+int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
+ struct afs_fid *_fid)
+{
+ const union afs_xdr_dir_block *meta;
+ unsigned int entry;
+ int ret = -ESTALE;
+
+ meta = afs_dir_find_block(iter, 0);
+ if (!meta)
+ return -ESTALE;
+
+ entry = ntohs(meta->meta.hashtable[iter->bucket & (AFS_DIR_HASHTBL_SIZE - 1)]);
+ _enter("%x,%x", iter->bucket, entry);
+
+ while (entry) {
+ const union afs_xdr_dir_block *block;
+ const union afs_xdr_dirent *dire;
+ unsigned int blnum = entry / AFS_DIR_SLOTS_PER_BLOCK;
+ unsigned int slot = entry % AFS_DIR_SLOTS_PER_BLOCK;
+ unsigned int resv = (blnum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+
+ _debug("search %x", entry);
+
+ if (slot < resv) {
+ kdebug("slot out of range h=%x rs=%2x sl=%2x-%2x",
+ iter->bucket, resv, slot, slot + iter->nr_slots - 1);
+ goto bad;
+ }
+
+ block = afs_dir_find_block(iter, blnum);
+ if (!block)
+ goto bad;
+ dire = &block->dirents[slot];
+
+ if (slot + iter->nr_slots <= AFS_DIR_SLOTS_PER_BLOCK &&
+ memcmp(dire->u.name, name->name, name->len) == 0 &&
+ dire->u.name[name->len] == '\0') {
+ _fid->vnode = ntohl(dire->u.vnode);
+ _fid->unique = ntohl(dire->u.unique);
+ ret = entry;
+ goto found;
+ }
+
+ iter->prev_entry = entry;
+ entry = ntohs(dire->u.hash_next);
+ if (!--iter->loop_check) {
+ kdebug("dir chain loop h=%x", iter->bucket);
+ goto bad;
+ }
+ }
+
+ ret = -ENOENT;
+found:
+ if (iter->block) {
+ kunmap_local(iter->block);
+ iter->block = NULL;
+ }
+
+bad:
+ if (ret == -ESTALE)
+ afs_invalidate_dir(iter->dvnode, afs_dir_invalid_iter_stale);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Search the appropriate hash chain in the contents of an AFS directory.
+ */
+int afs_dir_search(struct afs_vnode *dvnode, struct qstr *name,
+ struct afs_fid *_fid, afs_dataversion_t *_dir_version)
+{
+ struct afs_dir_iter iter = { .dvnode = dvnode, };
+ int ret, retry_limit = 3;
+
+ _enter("{%lu},,,", dvnode->netfs.inode.i_ino);
+
+ if (!afs_dir_init_iter(&iter, name))
+ return -ENOENT;
+ do {
+ if (--retry_limit < 0) {
+ pr_warn("afs_read_dir(): Too many retries\n");
+ ret = -ESTALE;
+ break;
+ }
+ ret = afs_read_dir(dvnode, NULL);
+ if (ret < 0) {
+ if (ret != -ESTALE)
+ break;
+ if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) {
+ ret = -ESTALE;
+ break;
+ }
+ continue;
+ }
+ *_dir_version = inode_peek_iversion_raw(&dvnode->netfs.inode);
+
+ ret = afs_dir_search_bucket(&iter, name, _fid);
+ up_read(&dvnode->validate_lock);
+ if (ret == -ESTALE)
+ afs_dir_reset_iter(&iter);
+ } while (ret == -ESTALE);
+
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 6762eff97517..fc15497608c6 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -20,7 +20,6 @@
#include "internal.h"
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
-static int afs_symlink_read_folio(struct file *file, struct folio *folio);
static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
@@ -61,13 +60,6 @@ const struct address_space_operations afs_file_aops = {
.writepages = afs_writepages,
};
-const struct address_space_operations afs_symlink_aops = {
- .read_folio = afs_symlink_read_folio,
- .release_folio = netfs_release_folio,
- .invalidate_folio = netfs_invalidate_folio,
- .migrate_folio = filemap_migrate_folio,
-};
-
static const struct vm_operations_struct afs_vm_ops = {
.open = afs_vm_open,
.close = afs_vm_close,
@@ -208,49 +200,12 @@ int afs_release(struct inode *inode, struct file *file)
return ret;
}
-/*
- * Allocate a new read record.
- */
-struct afs_read *afs_alloc_read(gfp_t gfp)
-{
- struct afs_read *req;
-
- req = kzalloc(sizeof(struct afs_read), gfp);
- if (req)
- refcount_set(&req->usage, 1);
-
- return req;
-}
-
-/*
- * Dispose of a ref to a read record.
- */
-void afs_put_read(struct afs_read *req)
-{
- if (refcount_dec_and_test(&req->usage)) {
- if (req->cleanup)
- req->cleanup(req);
- key_put(req->key);
- kfree(req);
- }
-}
-
static void afs_fetch_data_notify(struct afs_operation *op)
{
- struct afs_read *req = op->fetch.req;
- struct netfs_io_subrequest *subreq = req->subreq;
- int error = afs_op_error(op);
-
- req->error = error;
- if (subreq) {
- subreq->rreq->i_size = req->file_size;
- if (req->pos + req->actual_len >= req->file_size)
- __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
- netfs_read_subreq_terminated(subreq, error, false);
- req->subreq = NULL;
- } else if (req->done) {
- req->done(req);
- }
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
+
+ subreq->error = afs_op_error(op);
+ netfs_read_subreq_terminated(subreq);
}
static void afs_fetch_data_success(struct afs_operation *op)
@@ -260,7 +215,7 @@ static void afs_fetch_data_success(struct afs_operation *op)
_enter("op=%08x", op->debug_id);
afs_vnode_commit_status(op, &op->file[0]);
afs_stat_v(vnode, n_fetches);
- atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes);
+ atomic_long_add(op->fetch.subreq->transferred, &op->net->n_fetch_bytes);
afs_fetch_data_notify(op);
}
@@ -270,107 +225,188 @@ static void afs_fetch_data_aborted(struct afs_operation *op)
afs_fetch_data_notify(op);
}
-static void afs_fetch_data_put(struct afs_operation *op)
-{
- op->fetch.req->error = afs_op_error(op);
- afs_put_read(op->fetch.req);
-}
-
-static const struct afs_operation_ops afs_fetch_data_operation = {
+const struct afs_operation_ops afs_fetch_data_operation = {
.issue_afs_rpc = afs_fs_fetch_data,
.issue_yfs_rpc = yfs_fs_fetch_data,
.success = afs_fetch_data_success,
.aborted = afs_fetch_data_aborted,
.failed = afs_fetch_data_notify,
- .put = afs_fetch_data_put,
};
+static void afs_issue_read_call(struct afs_operation *op)
+{
+ op->call_responded = false;
+ op->call_error = 0;
+ op->call_abort_code = 0;
+ if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags))
+ yfs_fs_fetch_data(op);
+ else
+ afs_fs_fetch_data(op);
+}
+
+static void afs_end_read(struct afs_operation *op)
+{
+ if (op->call_responded && op->server)
+ set_bit(AFS_SERVER_FL_RESPONDING, &op->server->flags);
+
+ if (!afs_op_error(op))
+ afs_fetch_data_success(op);
+ else if (op->cumul_error.aborted)
+ afs_fetch_data_aborted(op);
+ else
+ afs_fetch_data_notify(op);
+
+ afs_end_vnode_operation(op);
+ afs_put_operation(op);
+}
+
+/*
+ * Perform I/O processing on an asynchronous call. The work item carries a ref
+ * to the call struct that we either need to release or to pass on.
+ */
+static void afs_read_receive(struct afs_call *call)
+{
+ struct afs_operation *op = call->op;
+ enum afs_call_state state;
+
+ _enter("");
+
+ state = READ_ONCE(call->state);
+ if (state == AFS_CALL_COMPLETE)
+ return;
+ trace_afs_read_recv(op, call);
+
+ while (state < AFS_CALL_COMPLETE && READ_ONCE(call->need_attention)) {
+ WRITE_ONCE(call->need_attention, false);
+ afs_deliver_to_call(call);
+ state = READ_ONCE(call->state);
+ }
+
+ if (state < AFS_CALL_COMPLETE) {
+ netfs_read_subreq_progress(op->fetch.subreq);
+ if (rxrpc_kernel_check_life(call->net->socket, call->rxcall))
+ return;
+ /* rxrpc terminated the call. */
+ afs_set_call_complete(call, call->error, call->abort_code);
+ }
+
+ op->call_abort_code = call->abort_code;
+ op->call_error = call->error;
+ op->call_responded = call->responded;
+ op->call = NULL;
+ call->op = NULL;
+ afs_put_call(call);
+
+ /* If the call failed, then we need to crank the server rotation
+ * handle and try the next.
+ */
+ if (afs_select_fileserver(op)) {
+ afs_issue_read_call(op);
+ return;
+ }
+
+ afs_end_read(op);
+}
+
+void afs_fetch_data_async_rx(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, async_work);
+
+ afs_read_receive(call);
+ afs_put_call(call);
+}
+
+void afs_fetch_data_immediate_cancel(struct afs_call *call)
+{
+ if (call->async) {
+ afs_get_call(call, afs_call_trace_wake);
+ if (!queue_work(afs_async_calls, &call->async_work))
+ afs_deferred_put_call(call);
+ flush_work(&call->async_work);
+ }
+}
+
/*
* Fetch file data from the volume.
*/
-int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req)
+static void afs_issue_read(struct netfs_io_subrequest *subreq)
{
struct afs_operation *op;
+ struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
+ struct key *key = subreq->rreq->netfs_priv;
_enter("%s{%llx:%llu.%u},%x,,,",
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
- key_serial(req->key));
+ key_serial(key));
- op = afs_alloc_operation(req->key, vnode->volume);
+ op = afs_alloc_operation(key, vnode->volume);
if (IS_ERR(op)) {
- if (req->subreq)
- netfs_read_subreq_terminated(req->subreq, PTR_ERR(op), false);
- return PTR_ERR(op);
+ subreq->error = PTR_ERR(op);
+ netfs_read_subreq_terminated(subreq);
+ return;
}
afs_op_set_vnode(op, 0, vnode);
- op->fetch.req = afs_get_read(req);
+ op->fetch.subreq = subreq;
op->ops = &afs_fetch_data_operation;
- return afs_do_sync_operation(op);
-}
-
-static void afs_read_worker(struct work_struct *work)
-{
- struct netfs_io_subrequest *subreq = container_of(work, struct netfs_io_subrequest, work);
- struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
- struct afs_read *fsreq;
-
- fsreq = afs_alloc_read(GFP_NOFS);
- if (!fsreq)
- return netfs_read_subreq_terminated(subreq, -ENOMEM, false);
-
- fsreq->subreq = subreq;
- fsreq->pos = subreq->start + subreq->transferred;
- fsreq->len = subreq->len - subreq->transferred;
- fsreq->key = key_get(subreq->rreq->netfs_priv);
- fsreq->vnode = vnode;
- fsreq->iter = &subreq->io_iter;
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
- afs_fetch_data(fsreq->vnode, fsreq);
- afs_put_read(fsreq);
-}
-
-static void afs_issue_read(struct netfs_io_subrequest *subreq)
-{
- INIT_WORK(&subreq->work, afs_read_worker);
- queue_work(system_long_wq, &subreq->work);
-}
-static int afs_symlink_read_folio(struct file *file, struct folio *folio)
-{
- struct afs_vnode *vnode = AFS_FS_I(folio->mapping->host);
- struct afs_read *fsreq;
- int ret;
+ if (subreq->rreq->origin == NETFS_READAHEAD ||
+ subreq->rreq->iocb) {
+ op->flags |= AFS_OPERATION_ASYNC;
- fsreq = afs_alloc_read(GFP_NOFS);
- if (!fsreq)
- return -ENOMEM;
+ if (!afs_begin_vnode_operation(op)) {
+ subreq->error = afs_put_operation(op);
+ netfs_read_subreq_terminated(subreq);
+ return;
+ }
- fsreq->pos = folio_pos(folio);
- fsreq->len = folio_size(folio);
- fsreq->vnode = vnode;
- fsreq->iter = &fsreq->def_iter;
- iov_iter_xarray(&fsreq->def_iter, ITER_DEST, &folio->mapping->i_pages,
- fsreq->pos, fsreq->len);
+ if (!afs_select_fileserver(op)) {
+ afs_end_read(op);
+ return;
+ }
- ret = afs_fetch_data(fsreq->vnode, fsreq);
- if (ret == 0)
- folio_mark_uptodate(folio);
- folio_unlock(folio);
- return ret;
+ afs_issue_read_call(op);
+ } else {
+ afs_do_sync_operation(op);
+ }
}
static int afs_init_request(struct netfs_io_request *rreq, struct file *file)
{
+ struct afs_vnode *vnode = AFS_FS_I(rreq->inode);
+
if (file)
rreq->netfs_priv = key_get(afs_file_key(file));
rreq->rsize = 256 * 1024;
rreq->wsize = 256 * 1024 * 1024;
+
+ switch (rreq->origin) {
+ case NETFS_READ_SINGLE:
+ if (!file) {
+ struct key *key = afs_request_key(vnode->volume->cell);
+
+ if (IS_ERR(key))
+ return PTR_ERR(key);
+ rreq->netfs_priv = key;
+ }
+ break;
+ case NETFS_WRITEBACK:
+ case NETFS_WRITETHROUGH:
+ case NETFS_UNBUFFERED_WRITE:
+ case NETFS_DIO_WRITE:
+ if (S_ISREG(rreq->inode->i_mode))
+ rreq->io_streams[0].avail = true;
+ break;
+ case NETFS_WRITEBACK_SINGLE:
+ default:
+ break;
+ }
return 0;
}
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index 428721bbe4f6..8418813ee043 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -49,6 +49,105 @@ struct afs_operation *afs_alloc_operation(struct key *key, struct afs_volume *vo
return op;
}
+struct afs_io_locker {
+ struct list_head link;
+ struct task_struct *task;
+ unsigned long have_lock;
+};
+
+/*
+ * Unlock the I/O lock on a vnode.
+ */
+static void afs_unlock_for_io(struct afs_vnode *vnode)
+{
+ struct afs_io_locker *locker;
+
+ spin_lock(&vnode->lock);
+ locker = list_first_entry_or_null(&vnode->io_lock_waiters,
+ struct afs_io_locker, link);
+ if (locker) {
+ list_del(&locker->link);
+ smp_store_release(&locker->have_lock, 1); /* The unlock barrier. */
+ smp_mb__after_atomic(); /* Store have_lock before task state */
+ wake_up_process(locker->task);
+ } else {
+ clear_bit(AFS_VNODE_IO_LOCK, &vnode->flags);
+ }
+ spin_unlock(&vnode->lock);
+}
+
+/*
+ * Lock the I/O lock on a vnode uninterruptibly. We can't use an ordinary
+ * mutex as lockdep will complain if we unlock it in the wrong thread.
+ */
+static void afs_lock_for_io(struct afs_vnode *vnode)
+{
+ struct afs_io_locker myself = { .task = current, };
+
+ spin_lock(&vnode->lock);
+
+ if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+ spin_unlock(&vnode->lock);
+ return;
+ }
+
+ list_add_tail(&myself.link, &vnode->io_lock_waiters);
+ spin_unlock(&vnode->lock);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (smp_load_acquire(&myself.have_lock)) /* The lock barrier */
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+}
+
+/*
+ * Lock the I/O lock on a vnode interruptibly. We can't use an ordinary mutex
+ * as lockdep will complain if we unlock it in the wrong thread.
+ */
+static int afs_lock_for_io_interruptible(struct afs_vnode *vnode)
+{
+ struct afs_io_locker myself = { .task = current, };
+ int ret = 0;
+
+ spin_lock(&vnode->lock);
+
+ if (!test_and_set_bit(AFS_VNODE_IO_LOCK, &vnode->flags)) {
+ spin_unlock(&vnode->lock);
+ return 0;
+ }
+
+ list_add_tail(&myself.link, &vnode->io_lock_waiters);
+ spin_unlock(&vnode->lock);
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (smp_load_acquire(&myself.have_lock) || /* The lock barrier */
+ signal_pending(current))
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ /* If we got a signal, try to transfer the lock onto the next
+ * waiter.
+ */
+ if (unlikely(signal_pending(current))) {
+ spin_lock(&vnode->lock);
+ if (myself.have_lock) {
+ spin_unlock(&vnode->lock);
+ afs_unlock_for_io(vnode);
+ } else {
+ list_del(&myself.link);
+ spin_unlock(&vnode->lock);
+ }
+ ret = -ERESTARTSYS;
+ }
+ return ret;
+}
+
/*
* Lock the vnode(s) being operated upon.
*/
@@ -60,7 +159,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
_enter("");
if (op->flags & AFS_OPERATION_UNINTR) {
- mutex_lock(&vnode->io_lock);
+ afs_lock_for_io(vnode);
op->flags |= AFS_OPERATION_LOCK_0;
_leave(" = t [1]");
return true;
@@ -72,7 +171,7 @@ static bool afs_get_io_locks(struct afs_operation *op)
if (vnode2 > vnode)
swap(vnode, vnode2);
- if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
+ if (afs_lock_for_io_interruptible(vnode) < 0) {
afs_op_set_error(op, -ERESTARTSYS);
op->flags |= AFS_OPERATION_STOP;
_leave(" = f [I 0]");
@@ -81,10 +180,10 @@ static bool afs_get_io_locks(struct afs_operation *op)
op->flags |= AFS_OPERATION_LOCK_0;
if (vnode2) {
- if (mutex_lock_interruptible_nested(&vnode2->io_lock, 1) < 0) {
+ if (afs_lock_for_io_interruptible(vnode2) < 0) {
afs_op_set_error(op, -ERESTARTSYS);
op->flags |= AFS_OPERATION_STOP;
- mutex_unlock(&vnode->io_lock);
+ afs_unlock_for_io(vnode);
op->flags &= ~AFS_OPERATION_LOCK_0;
_leave(" = f [I 1]");
return false;
@@ -104,9 +203,9 @@ static void afs_drop_io_locks(struct afs_operation *op)
_enter("");
if (op->flags & AFS_OPERATION_LOCK_1)
- mutex_unlock(&vnode2->io_lock);
+ afs_unlock_for_io(vnode2);
if (op->flags & AFS_OPERATION_LOCK_0)
- mutex_unlock(&vnode->io_lock);
+ afs_unlock_for_io(vnode);
}
static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *vp,
@@ -157,7 +256,7 @@ bool afs_begin_vnode_operation(struct afs_operation *op)
/*
* Tidy up a filesystem cursor and unlock the vnode.
*/
-static void afs_end_vnode_operation(struct afs_operation *op)
+void afs_end_vnode_operation(struct afs_operation *op)
{
_enter("");
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 098fa034a1cc..1d9ecd5418d8 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -301,19 +301,19 @@ void afs_fs_fetch_status(struct afs_operation *op)
static int afs_deliver_fs_fetch_data(struct afs_call *call)
{
struct afs_operation *op = call->op;
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
const __be32 *bp;
size_t count_before;
int ret;
_enter("{%u,%zu,%zu/%llu}",
call->unmarshall, call->iov_len, iov_iter_count(call->iter),
- req->actual_len);
+ call->remaining);
switch (call->unmarshall) {
case 0:
- req->actual_len = 0;
+ call->remaining = 0;
call->unmarshall++;
if (call->operation_ID == FSFETCHDATA64) {
afs_extract_to_tmp64(call);
@@ -323,8 +323,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
}
fallthrough;
- /* Extract the returned data length into
- * ->actual_len. This may indicate more or less data than was
+ /* Extract the returned data length into ->remaining.
+ * This may indicate more or less data than was
* requested will be returned.
*/
case 1:
@@ -333,42 +333,40 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
if (ret < 0)
return ret;
- req->actual_len = be64_to_cpu(call->tmp64);
- _debug("DATA length: %llu", req->actual_len);
+ call->remaining = be64_to_cpu(call->tmp64);
+ _debug("DATA length: %llu", call->remaining);
- if (req->actual_len == 0)
+ if (call->remaining == 0)
goto no_more_data;
- call->iter = req->iter;
- call->iov_len = min(req->actual_len, req->len);
+ call->iter = &subreq->io_iter;
+ call->iov_len = umin(call->remaining, subreq->len - subreq->transferred);
call->unmarshall++;
fallthrough;
/* extract the returned data */
case 2:
count_before = call->iov_len;
- _debug("extract data %zu/%llu", count_before, req->actual_len);
+ _debug("extract data %zu/%llu", count_before, call->remaining);
ret = afs_extract_data(call, true);
- if (req->subreq) {
- req->subreq->transferred += count_before - call->iov_len;
- netfs_read_subreq_progress(req->subreq, false);
- }
+ subreq->transferred += count_before - call->iov_len;
+ call->remaining -= count_before - call->iov_len;
if (ret < 0)
return ret;
call->iter = &call->def_iter;
- if (req->actual_len <= req->len)
+ if (call->remaining)
goto no_more_data;
/* Discard any excess data the server gave us */
- afs_extract_discard(call, req->actual_len - req->len);
+ afs_extract_discard(call, call->remaining);
call->unmarshall = 3;
fallthrough;
case 3:
_debug("extract discard %zu/%llu",
- iov_iter_count(call->iter), req->actual_len - req->len);
+ iov_iter_count(call->iter), call->remaining);
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -390,8 +388,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
xdr_decode_AFSCallBack(&bp, call, &vp->scb);
xdr_decode_AFSVolSync(&bp, &op->volsync);
- req->data_version = vp->scb.status.data_version;
- req->file_size = vp->scb.status.size;
+ if (subreq->start + subreq->transferred >= vp->scb.status.size)
+ __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
call->unmarshall++;
fallthrough;
@@ -410,14 +408,18 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
static const struct afs_call_type afs_RXFSFetchData = {
.name = "FS.FetchData",
.op = afs_FS_FetchData,
+ .async_rx = afs_fetch_data_async_rx,
.deliver = afs_deliver_fs_fetch_data,
+ .immediate_cancel = afs_fetch_data_immediate_cancel,
.destructor = afs_flat_call_destructor,
};
static const struct afs_call_type afs_RXFSFetchData64 = {
.name = "FS.FetchData64",
.op = afs_FS_FetchData64,
+ .async_rx = afs_fetch_data_async_rx,
.deliver = afs_deliver_fs_fetch_data,
+ .immediate_cancel = afs_fetch_data_immediate_cancel,
.destructor = afs_flat_call_destructor,
};
@@ -426,8 +428,8 @@ static const struct afs_call_type afs_RXFSFetchData64 = {
*/
static void afs_fs_fetch_data64(struct afs_operation *op)
{
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
struct afs_call *call;
__be32 *bp;
@@ -437,16 +439,19 @@ static void afs_fs_fetch_data64(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
+ if (op->flags & AFS_OPERATION_ASYNC)
+ call->async = true;
+
/* marshall the parameters */
bp = call->request;
bp[0] = htonl(FSFETCHDATA64);
bp[1] = htonl(vp->fid.vid);
bp[2] = htonl(vp->fid.vnode);
bp[3] = htonl(vp->fid.unique);
- bp[4] = htonl(upper_32_bits(req->pos));
- bp[5] = htonl(lower_32_bits(req->pos));
+ bp[4] = htonl(upper_32_bits(subreq->start + subreq->transferred));
+ bp[5] = htonl(lower_32_bits(subreq->start + subreq->transferred));
bp[6] = 0;
- bp[7] = htonl(lower_32_bits(req->len));
+ bp[7] = htonl(lower_32_bits(subreq->len - subreq->transferred));
call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
@@ -458,9 +463,9 @@ static void afs_fs_fetch_data64(struct afs_operation *op)
*/
void afs_fs_fetch_data(struct afs_operation *op)
{
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
struct afs_call *call;
- struct afs_read *req = op->fetch.req;
__be32 *bp;
if (test_bit(AFS_SERVER_FL_HAS_FS64, &op->server->flags))
@@ -472,16 +477,14 @@ void afs_fs_fetch_data(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
- req->call_debug_id = call->debug_id;
-
/* marshall the parameters */
bp = call->request;
bp[0] = htonl(FSFETCHDATA);
bp[1] = htonl(vp->fid.vid);
bp[2] = htonl(vp->fid.vnode);
bp[3] = htonl(vp->fid.unique);
- bp[4] = htonl(lower_32_bits(req->pos));
- bp[5] = htonl(lower_32_bits(req->len));
+ bp[4] = htonl(lower_32_bits(subreq->start + subreq->transferred));
+ bp[5] = htonl(lower_32_bits(subreq->len + subreq->transferred));
call->fid = vp->fid;
trace_afs_make_fs_call(call, &vp->fid);
@@ -1733,6 +1736,7 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
.op = afs_FS_GetCapabilities,
.deliver = afs_deliver_fs_get_capabilities,
.done = afs_fileserver_probe_result,
+ .immediate_cancel = afs_fileserver_probe_result,
.destructor = afs_fs_get_capabilities_destructor,
};
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index a95e77670b49..e9538e91f848 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -25,8 +25,94 @@
#include "internal.h"
#include "afs_fs.h"
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op)
+{
+ size_t size = strlen(op->create.symlink) + 1;
+ size_t dsize = 0;
+ char *p;
+
+ if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size,
+ mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0)
+ return;
+
+ vnode->directory_size = dsize;
+ p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0);
+ memcpy(p, op->create.symlink, size);
+ kunmap_local(p);
+ set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
+ netfs_single_mark_inode_dirty(&vnode->netfs.inode);
+}
+
+static void afs_put_link(void *arg)
+{
+ struct folio *folio = virt_to_folio(arg);
+
+ kunmap_local(arg);
+ folio_put(folio);
+}
+
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ struct folio *folio;
+ char *content;
+ ssize_t ret;
+
+ if (!dentry) {
+ /* RCU pathwalk. */
+ if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode))
+ return ERR_PTR(-ECHILD);
+ goto good;
+ }
+
+ if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
+ goto fetch;
+
+ ret = afs_validate(vnode, NULL);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
+ test_bit(AFS_VNODE_DIR_READ, &vnode->flags))
+ goto good;
+
+fetch:
+ ret = afs_read_single(vnode, NULL);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ set_bit(AFS_VNODE_DIR_READ, &vnode->flags);
+
+good:
+ folio = folioq_folio(vnode->directory, 0);
+ folio_get(folio);
+ content = kmap_local_folio(folio, 0);
+ set_delayed_call(callback, afs_put_link, content);
+ return content;
+}
+
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+ DEFINE_DELAYED_CALL(done);
+ const char *content;
+ int len;
+
+ content = afs_get_link(dentry, d_inode(dentry), &done);
+ if (IS_ERR(content)) {
+ do_delayed_call(&done);
+ return PTR_ERR(content);
+ }
+
+ len = umin(strlen(content), buflen);
+ if (copy_to_user(buffer, content, len))
+ len = -EFAULT;
+ do_delayed_call(&done);
+ return len;
+}
+
static const struct inode_operations afs_symlink_inode_operations = {
- .get_link = page_get_link,
+ .get_link = afs_get_link,
+ .readlink = afs_readlink,
};
static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
@@ -110,7 +196,9 @@ static int afs_inode_init_from_status(struct afs_operation *op,
inode->i_op = &afs_dir_inode_operations;
inode->i_fop = &afs_dir_file_operations;
inode->i_mapping->a_ops = &afs_dir_aops;
- mapping_set_large_folios(inode->i_mapping);
+ __set_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &vnode->netfs.flags);
+ /* Assume locally cached directory data will be valid. */
+ __set_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
break;
case AFS_FTYPE_SYMLINK:
/* Symlinks with a mode of 0644 are actually mountpoints. */
@@ -122,13 +210,13 @@ static int afs_inode_init_from_status(struct afs_operation *op,
inode->i_mode = S_IFDIR | 0555;
inode->i_op = &afs_mntpt_inode_operations;
inode->i_fop = &afs_mntpt_file_operations;
- inode->i_mapping->a_ops = &afs_symlink_aops;
} else {
inode->i_mode = S_IFLNK | status->mode;
inode->i_op = &afs_symlink_inode_operations;
- inode->i_mapping->a_ops = &afs_symlink_aops;
}
+ inode->i_mapping->a_ops = &afs_dir_aops;
inode_nohighmem(inode);
+ mapping_set_release_always(inode->i_mapping);
break;
default:
dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL);
@@ -140,15 +228,17 @@ static int afs_inode_init_from_status(struct afs_operation *op,
afs_set_netfs_context(vnode);
vnode->invalid_before = status->data_version;
+ trace_afs_set_dv(vnode, status->data_version);
inode_set_iversion_raw(&vnode->netfs.inode, status->data_version);
if (!vp->scb.have_cb) {
/* it's a symlink we just created (the fileserver
* didn't give us a callback) */
- atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
+ afs_clear_cb_promise(vnode, afs_cb_promise_set_new_symlink);
} else {
vnode->cb_server = op->server;
- atomic64_set(&vnode->cb_expires_at, vp->scb.callback.expires_at);
+ afs_set_cb_promise(vnode, vp->scb.callback.expires_at,
+ afs_cb_promise_set_new_inode);
}
write_sequnlock(&vnode->cb_lock);
@@ -207,12 +297,17 @@ static void afs_apply_status(struct afs_operation *op,
if (vp->update_ctime)
inode_set_ctime_to_ts(inode, op->ctime);
- if (vnode->status.data_version != status->data_version)
+ if (vnode->status.data_version != status->data_version) {
+ trace_afs_set_dv(vnode, status->data_version);
data_changed = true;
+ }
vnode->status = *status;
if (vp->dv_before + vp->dv_delta != status->data_version) {
+ trace_afs_dv_mismatch(vnode, vp->dv_before, vp->dv_delta,
+ status->data_version);
+
if (vnode->cb_ro_snapshot == atomic_read(&vnode->volume->cb_ro_snapshot) &&
atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE)
pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n",
@@ -223,12 +318,10 @@ static void afs_apply_status(struct afs_operation *op,
op->debug_id);
vnode->invalid_before = status->data_version;
- if (vnode->status.type == AFS_FTYPE_DIR) {
- if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
- afs_stat_v(vnode, n_inval);
- } else {
+ if (vnode->status.type == AFS_FTYPE_DIR)
+ afs_invalidate_dir(vnode, afs_dir_invalid_dv_mismatch);
+ else
set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
- }
change_size = true;
data_changed = true;
unexpected_jump = true;
@@ -258,6 +351,8 @@ static void afs_apply_status(struct afs_operation *op,
inode_set_ctime_to_ts(inode, t);
inode_set_atime_to_ts(inode, t);
}
+ if (op->ops == &afs_fetch_data_operation)
+ op->fetch.subreq->rreq->i_size = status->size;
}
}
@@ -273,7 +368,7 @@ static void afs_apply_callback(struct afs_operation *op,
if (!afs_cb_is_broken(vp->cb_break_before, vnode)) {
if (op->volume->type == AFSVL_RWVOL)
vnode->cb_server = op->server;
- atomic64_set(&vnode->cb_expires_at, cb->expires_at);
+ afs_set_cb_promise(vnode, cb->expires_at, afs_cb_promise_set_apply_cb);
}
}
@@ -435,7 +530,9 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
} __packed key;
struct afs_vnode_cache_aux aux;
- if (vnode->status.type != AFS_FTYPE_FILE) {
+ if (vnode->status.type != AFS_FTYPE_FILE &&
+ vnode->status.type != AFS_FTYPE_DIR &&
+ vnode->status.type != AFS_FTYPE_SYMLINK) {
vnode->netfs.cache = NULL;
return;
}
@@ -637,6 +734,7 @@ int afs_drop_inode(struct inode *inode)
void afs_evict_inode(struct inode *inode)
{
struct afs_vnode_cache_aux aux;
+ struct afs_super_info *sbi = AFS_FS_S(inode->i_sb);
struct afs_vnode *vnode = AFS_FS_I(inode);
_enter("{%llx:%llu.%d}",
@@ -648,8 +746,22 @@ void afs_evict_inode(struct inode *inode)
ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
+ if ((S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) &&
+ (inode->i_state & I_DIRTY) &&
+ !sbi->dyn_root) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .for_sync = true,
+ .range_end = LLONG_MAX,
+ };
+
+ afs_single_writepages(inode->i_mapping, &wbc);
+ }
+
netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
+ netfs_free_folioq_buffer(vnode->directory);
afs_set_cache_aux(vnode, &aux);
netfs_clear_inode_writeback(inode, &aux);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index c9d620175e80..90f407774a9a 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -163,6 +163,7 @@ struct afs_call {
spinlock_t state_lock;
int error; /* error code */
u32 abort_code; /* Remote abort ID or 0 */
+ unsigned long long remaining; /* How much is left to receive */
unsigned int max_lifespan; /* Maximum lifespan in secs to set if not 0 */
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
@@ -201,11 +202,17 @@ struct afs_call_type {
/* clean up a call */
void (*destructor)(struct afs_call *call);
+ /* Async receive processing function */
+ void (*async_rx)(struct work_struct *work);
+
/* Work function */
void (*work)(struct work_struct *work);
/* Call done function (gets called immediately on success or failure) */
void (*done)(struct afs_call *call);
+
+ /* Handle a call being immediately cancelled. */
+ void (*immediate_cancel)(struct afs_call *call);
};
/*
@@ -233,28 +240,6 @@ static inline struct key *afs_file_key(struct file *file)
}
/*
- * Record of an outstanding read operation on a vnode.
- */
-struct afs_read {
- loff_t pos; /* Where to start reading */
- loff_t len; /* How much we're asking for */
- loff_t actual_len; /* How much we're actually getting */
- loff_t file_size; /* File size returned by server */
- struct key *key; /* The key to use to reissue the read */
- struct afs_vnode *vnode; /* The file being read into. */
- struct netfs_io_subrequest *subreq; /* Fscache helper read request this belongs to */
- afs_dataversion_t data_version; /* Version number returned by server */
- refcount_t usage;
- unsigned int call_debug_id;
- unsigned int nr_pages;
- int error;
- void (*done)(struct afs_read *);
- void (*cleanup)(struct afs_read *);
- struct iov_iter *iter; /* Iterator representing the buffer */
- struct iov_iter def_iter; /* Default iterator */
-};
-
-/*
* AFS superblock private data
* - there's one superblock per volume
*/
@@ -702,13 +687,14 @@ struct afs_vnode {
struct afs_file_status status; /* AFS status info for this file */
afs_dataversion_t invalid_before; /* Child dentries are invalid before this */
struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */
- struct mutex io_lock; /* Lock for serialising I/O on this mutex */
+ struct list_head io_lock_waiters; /* Threads waiting for the I/O lock */
struct rw_semaphore validate_lock; /* lock for validating this vnode */
struct rw_semaphore rmdir_lock; /* Lock for rmdir vs sillyrename */
struct key *silly_key; /* Silly rename key */
spinlock_t wb_lock; /* lock for wb_keys */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
+#define AFS_VNODE_IO_LOCK 0 /* Set if the I/O serialisation lock is held */
#define AFS_VNODE_UNSET 1 /* set if vnode attributes not yet set */
#define AFS_VNODE_DIR_VALID 2 /* Set if dir contents are valid */
#define AFS_VNODE_ZAP_DATA 3 /* set if vnode's data should be invalidated */
@@ -719,7 +705,9 @@ struct afs_vnode {
#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */
#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */
#define AFS_VNODE_MODIFYING 10 /* Set if we're performing a modification op */
+#define AFS_VNODE_DIR_READ 11 /* Set if we've read a dir's contents */
+ struct folio_queue *directory; /* Directory contents */
struct list_head wb_keys; /* List of keys available for writeback */
struct list_head pending_locks; /* locks waiting to be granted */
struct list_head granted_locks; /* locks granted on this file */
@@ -728,6 +716,7 @@ struct afs_vnode {
ktime_t locked_at; /* Time at which lock obtained */
enum afs_lock_state lock_state : 8;
afs_lock_type_t lock_type : 8;
+ unsigned int directory_size; /* Amount of space in ->directory */
/* outstanding callback notification on this file */
struct work_struct cb_work; /* Work for mmap'd files */
@@ -907,7 +896,7 @@ struct afs_operation {
bool new_negative;
} rename;
struct {
- struct afs_read *req;
+ struct netfs_io_subrequest *subreq;
} fetch;
struct {
afs_lock_type_t type;
@@ -959,6 +948,7 @@ struct afs_operation {
#define AFS_OPERATION_TRIED_ALL 0x0400 /* Set if we've tried all the fileservers */
#define AFS_OPERATION_RETRY_SERVER 0x0800 /* Set if we should retry the current server */
#define AFS_OPERATION_DIR_CONFLICT 0x1000 /* Set if we detected a 3rd-party dir change */
+#define AFS_OPERATION_ASYNC 0x2000 /* Set if should run asynchronously */
};
/*
@@ -983,6 +973,21 @@ static inline void afs_invalidate_cache(struct afs_vnode *vnode, unsigned int fl
i_size_read(&vnode->netfs.inode), flags);
}
+/*
+ * Directory iteration management.
+ */
+struct afs_dir_iter {
+ struct afs_vnode *dvnode;
+ union afs_xdr_dir_block *block;
+ struct folio_queue *fq;
+ unsigned int fpos;
+ int fq_slot;
+ unsigned int loop_check;
+ u8 nr_slots;
+ u8 bucket;
+ unsigned int prev_entry;
+};
+
#include <trace/events/afs.h>
/*****************************************************************************/
@@ -1064,8 +1069,13 @@ extern const struct inode_operations afs_dir_inode_operations;
extern const struct address_space_operations afs_dir_aops;
extern const struct dentry_operations afs_fs_dentry_operations;
+ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file);
+ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
+ __acquires(&dvnode->validate_lock);
extern void afs_d_release(struct dentry *);
extern void afs_check_for_remote_deletion(struct afs_operation *);
+int afs_single_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
/*
* dir_edit.c
@@ -1075,6 +1085,18 @@ extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *
extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode,
enum afs_edit_dir_reason why);
+void afs_mkdir_init_dir(struct afs_vnode *dvnode, struct afs_vnode *parent_vnode);
+
+/*
+ * dir_search.c
+ */
+unsigned int afs_dir_hash_name(const struct qstr *name);
+bool afs_dir_init_iter(struct afs_dir_iter *iter, const struct qstr *name);
+union afs_xdr_dir_block *afs_dir_find_block(struct afs_dir_iter *iter, size_t block);
+int afs_dir_search_bucket(struct afs_dir_iter *iter, const struct qstr *name,
+ struct afs_fid *_fid);
+int afs_dir_search(struct afs_vnode *dvnode, struct qstr *name,
+ struct afs_fid *_fid, afs_dataversion_t *_dir_version);
/*
* dir_silly.c
@@ -1099,24 +1121,17 @@ extern void afs_dynroot_depopulate(struct super_block *);
* file.c
*/
extern const struct address_space_operations afs_file_aops;
-extern const struct address_space_operations afs_symlink_aops;
extern const struct inode_operations afs_file_inode_operations;
extern const struct file_operations afs_file_operations;
+extern const struct afs_operation_ops afs_fetch_data_operation;
extern const struct netfs_request_ops afs_req_ops;
extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *);
extern void afs_put_wb_key(struct afs_wb_key *);
extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
-extern int afs_fetch_data(struct afs_vnode *, struct afs_read *);
-extern struct afs_read *afs_alloc_read(gfp_t);
-extern void afs_put_read(struct afs_read *);
-
-static inline struct afs_read *afs_get_read(struct afs_read *req)
-{
- refcount_inc(&req->usage);
- return req;
-}
+void afs_fetch_data_async_rx(struct work_struct *work);
+void afs_fetch_data_immediate_cancel(struct afs_call *call);
/*
* flock.c
@@ -1168,6 +1183,7 @@ extern void afs_fs_store_acl(struct afs_operation *);
extern struct afs_operation *afs_alloc_operation(struct key *, struct afs_volume *);
extern int afs_put_operation(struct afs_operation *);
extern bool afs_begin_vnode_operation(struct afs_operation *);
+extern void afs_end_vnode_operation(struct afs_operation *op);
extern void afs_wait_for_operation(struct afs_operation *);
extern int afs_do_sync_operation(struct afs_operation *);
@@ -1205,6 +1221,10 @@ extern void afs_fs_probe_cleanup(struct afs_net *);
*/
extern const struct afs_operation_ops afs_fetch_status_operation;
+void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op);
+const char *afs_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *callback);
+int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen);
extern void afs_vnode_commit_status(struct afs_operation *, struct afs_vnode_param *);
extern int afs_fetch_status(struct afs_vnode *, struct key *, bool, afs_access_t *);
extern int afs_ilookup5_test_by_fid(struct inode *, void *);
@@ -1336,6 +1356,7 @@ extern void afs_charge_preallocation(struct work_struct *);
extern void afs_put_call(struct afs_call *);
void afs_deferred_put_call(struct afs_call *call);
void afs_make_call(struct afs_call *call, gfp_t gfp);
+void afs_deliver_to_call(struct afs_call *call);
void afs_wait_for_call_to_complete(struct afs_call *call);
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
const struct afs_call_type *,
@@ -1346,6 +1367,28 @@ extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
extern int afs_extract_data(struct afs_call *, bool);
extern int afs_protocol_error(struct afs_call *, enum afs_eproto_cause);
+static inline struct afs_call *afs_get_call(struct afs_call *call,
+ enum afs_call_trace why)
+{
+ int r;
+
+ __refcount_inc(&call->ref, &r);
+
+ trace_afs_call(call->debug_id, why, r + 1,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+ return call;
+}
+
+static inline void afs_see_call(struct afs_call *call, enum afs_call_trace why)
+{
+ int r = refcount_read(&call->ref);
+
+ trace_afs_call(call->debug_id, why, r,
+ atomic_read(&call->net->nr_outstanding_calls),
+ __builtin_return_address(0));
+}
+
static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *call,
gfp_t gfp)
{
@@ -1712,6 +1755,38 @@ static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where)
return -EIO;
}
+/*
+ * Set the callback promise on a vnode.
+ */
+static inline void afs_set_cb_promise(struct afs_vnode *vnode, time64_t expires_at,
+ enum afs_cb_promise_trace trace)
+{
+ atomic64_set(&vnode->cb_expires_at, expires_at);
+ trace_afs_cb_promise(vnode, trace);
+}
+
+/*
+ * Clear the callback promise on a vnode, returning true if it was promised.
+ */
+static inline bool afs_clear_cb_promise(struct afs_vnode *vnode,
+ enum afs_cb_promise_trace trace)
+{
+ trace_afs_cb_promise(vnode, trace);
+ return atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE;
+}
+
+/*
+ * Mark a directory as being invalid.
+ */
+static inline void afs_invalidate_dir(struct afs_vnode *dvnode,
+ enum afs_dir_invalid_trace trace)
+{
+ if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+ trace_afs_dir_invalid(dvnode, trace);
+ afs_stat_v(dvnode, n_inval);
+ }
+}
+
/*****************************************************************************/
/*
* debug tracing
diff --git a/fs/afs/main.c b/fs/afs/main.c
index a14f6013e316..1ae0067f772d 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -177,7 +177,7 @@ static int __init afs_init(void)
afs_wq = alloc_workqueue("afs", 0, 0);
if (!afs_wq)
goto error_afs_wq;
- afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
+ afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (!afs_async_calls)
goto error_async;
afs_lock_manager = alloc_workqueue("kafs_lockd", WQ_MEM_RECLAIM, 0);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 297487ee8323..507c25a5b2cb 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -30,7 +30,7 @@ const struct file_operations afs_mntpt_file_operations = {
const struct inode_operations afs_mntpt_inode_operations = {
.lookup = afs_mntpt_lookup,
- .readlink = page_readlink,
+ .readlink = afs_readlink,
.getattr = afs_getattr,
};
@@ -118,9 +118,9 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
ctx->volnamesz = sizeof(afs_root_volume) - 1;
} else {
/* read the contents of the AFS special symlink */
- struct page *page;
+ DEFINE_DELAYED_CALL(cleanup);
+ const char *content;
loff_t size = i_size_read(d_inode(mntpt));
- char *buf;
if (src_as->cell)
ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt);
@@ -128,16 +128,16 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
if (size < 2 || size > PAGE_SIZE - 1)
return -EINVAL;
- page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
- if (IS_ERR(page))
- return PTR_ERR(page);
+ content = afs_get_link(mntpt, d_inode(mntpt), &cleanup);
+ if (IS_ERR(content)) {
+ do_delayed_call(&cleanup);
+ return PTR_ERR(content);
+ }
- buf = kmap(page);
ret = -EINVAL;
- if (buf[size - 1] == '.')
- ret = vfs_parse_fs_string(fc, "source", buf, size - 1);
- kunmap(page);
- put_page(page);
+ if (content[size - 1] == '.')
+ ret = vfs_parse_fs_string(fc, "source", content, size - 1);
+ do_delayed_call(&cleanup);
if (ret < 0)
return ret;
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index d612983d6f38..a1c24f589d9e 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -99,7 +99,7 @@ static bool afs_start_fs_iteration(struct afs_operation *op,
write_seqlock(&vnode->cb_lock);
ASSERTCMP(cb_server, ==, vnode->cb_server);
vnode->cb_server = NULL;
- if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE)
+ if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_rotate_server))
vnode->cb_break++;
write_sequnlock(&vnode->cb_lock);
}
@@ -583,7 +583,7 @@ selected_server:
if (vnode->cb_server != server) {
vnode->cb_server = server;
vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
- atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
+ afs_clear_cb_promise(vnode, afs_cb_promise_clear_server_change);
}
retry_server:
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 9f2a3bb56ec6..886416ea1d96 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -149,7 +149,8 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
call->net = net;
call->debug_id = atomic_inc_return(&rxrpc_debug_id);
refcount_set(&call->ref, 1);
- INIT_WORK(&call->async_work, afs_process_async_call);
+ INIT_WORK(&call->async_work, type->async_rx ?: afs_process_async_call);
+ INIT_WORK(&call->work, call->type->work);
INIT_WORK(&call->free_work, afs_deferred_free_worker);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->state_lock);
@@ -235,27 +236,12 @@ void afs_deferred_put_call(struct afs_call *call)
schedule_work(&call->free_work);
}
-static struct afs_call *afs_get_call(struct afs_call *call,
- enum afs_call_trace why)
-{
- int r;
-
- __refcount_inc(&call->ref, &r);
-
- trace_afs_call(call->debug_id, why, r + 1,
- atomic_read(&call->net->nr_outstanding_calls),
- __builtin_return_address(0));
- return call;
-}
-
/*
* Queue the call for actual work.
*/
static void afs_queue_call_work(struct afs_call *call)
{
if (call->type->work) {
- INIT_WORK(&call->work, call->type->work);
-
afs_get_call(call, afs_call_trace_work);
if (!queue_work(afs_wq, &call->work))
afs_put_call(call);
@@ -430,11 +416,16 @@ void afs_make_call(struct afs_call *call, gfp_t gfp)
return;
error_do_abort:
- if (ret != -ECONNABORTED) {
+ if (ret != -ECONNABORTED)
rxrpc_kernel_abort_call(call->net->socket, rxcall,
RX_USER_ABORT, ret,
afs_abort_send_data_error);
- } else {
+ if (call->async) {
+ afs_see_call(call, afs_call_trace_async_abort);
+ return;
+ }
+
+ if (ret == -ECONNABORTED) {
len = 0;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0);
rxrpc_kernel_recv_data(call->net->socket, rxcall,
@@ -445,8 +436,10 @@ error_do_abort:
call->error = ret;
trace_afs_call_done(call);
error_kill_call:
- if (call->type->done)
- call->type->done(call);
+ if (call->async)
+ afs_see_call(call, afs_call_trace_async_kill);
+ if (call->type->immediate_cancel)
+ call->type->immediate_cancel(call);
/* We need to dispose of the extra ref we grabbed for an async call.
* The call, however, might be queued on afs_async_calls and we need to
@@ -501,7 +494,7 @@ static void afs_log_error(struct afs_call *call, s32 remote_abort)
/*
* deliver messages to a call
*/
-static void afs_deliver_to_call(struct afs_call *call)
+void afs_deliver_to_call(struct afs_call *call)
{
enum afs_call_state state;
size_t len;
@@ -602,7 +595,6 @@ local_abort:
abort_code = 0;
call_complete:
afs_set_call_complete(call, ret, remote_abort);
- state = AFS_CALL_COMPLETE;
goto done;
}
@@ -803,6 +795,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
return -ENOTSUPP;
trace_afs_cb_call(call);
+ call->work.func = call->type->work;
/* pass responsibility for the remainer of this message off to the
* cache manager op */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index f3ba1c3e72f5..a9bee610674e 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -663,7 +663,7 @@ static void afs_i_init_once(void *_vnode)
memset(vnode, 0, sizeof(*vnode));
inode_init_once(&vnode->netfs.inode);
- mutex_init(&vnode->io_lock);
+ INIT_LIST_HEAD(&vnode->io_lock_waiters);
init_rwsem(&vnode->validate_lock);
spin_lock_init(&vnode->wb_lock);
spin_lock_init(&vnode->lock);
@@ -696,6 +696,8 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
vnode->volume = NULL;
vnode->lock_key = NULL;
vnode->permit_cache = NULL;
+ vnode->directory = NULL;
+ vnode->directory_size = 0;
vnode->flags = 1 << AFS_VNODE_UNSET;
vnode->lock_state = AFS_VNODE_LOCK_NONE;
diff --git a/fs/afs/validation.c b/fs/afs/validation.c
index bef8af12ebe2..0ba8336c9025 100644
--- a/fs/afs/validation.c
+++ b/fs/afs/validation.c
@@ -120,22 +120,31 @@
bool afs_check_validity(const struct afs_vnode *vnode)
{
const struct afs_volume *volume = vnode->volume;
+ enum afs_vnode_invalid_trace trace = afs_vnode_valid_trace;
+ time64_t cb_expires_at = atomic64_read(&vnode->cb_expires_at);
time64_t deadline = ktime_get_real_seconds() + 10;
if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
return true;
- if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) ||
- atomic64_read(&vnode->cb_expires_at) <= deadline ||
- volume->cb_expires_at <= deadline ||
- vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot) ||
- vnode->cb_scrub != atomic_read(&volume->cb_scrub) ||
- test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- _debug("inval");
- return false;
- }
-
- return true;
+ if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break))
+ trace = afs_vnode_invalid_trace_cb_v_break;
+ else if (cb_expires_at == AFS_NO_CB_PROMISE)
+ trace = afs_vnode_invalid_trace_no_cb_promise;
+ else if (cb_expires_at <= deadline)
+ trace = afs_vnode_invalid_trace_expired;
+ else if (volume->cb_expires_at <= deadline)
+ trace = afs_vnode_invalid_trace_vol_expired;
+ else if (vnode->cb_ro_snapshot != atomic_read(&volume->cb_ro_snapshot))
+ trace = afs_vnode_invalid_trace_cb_ro_snapshot;
+ else if (vnode->cb_scrub != atomic_read(&volume->cb_scrub))
+ trace = afs_vnode_invalid_trace_cb_scrub;
+ else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
+ trace = afs_vnode_invalid_trace_zap_data;
+ else
+ return true;
+ trace_afs_vnode_invalid(vnode, trace);
+ return false;
}
/*
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index cac75f89b64a..adc617a82a86 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -370,6 +370,7 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
.name = "VL.GetCapabilities",
.op = afs_VL_GetCapabilities,
.deliver = afs_deliver_vl_get_capabilities,
+ .immediate_cancel = afs_vlserver_probe_result,
.done = afs_vlserver_probe_result,
.destructor = afs_destroy_vl_get_capabilities,
};
diff --git a/fs/afs/write.c b/fs/afs/write.c
index ccb6aa8027c5..18b0a9f1615e 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -182,8 +182,8 @@ void afs_issue_write(struct netfs_io_subrequest *subreq)
*/
void afs_begin_writeback(struct netfs_io_request *wreq)
{
- afs_get_writeback_key(wreq);
- wreq->io_streams[0].avail = true;
+ if (S_ISREG(wreq->inode->i_mode))
+ afs_get_writeback_key(wreq);
}
/*
@@ -196,6 +196,18 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st
list_first_entry(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
+ switch (wreq->origin) {
+ case NETFS_READAHEAD:
+ case NETFS_READPAGE:
+ case NETFS_READ_GAPS:
+ case NETFS_READ_SINGLE:
+ case NETFS_READ_FOR_WRITE:
+ case NETFS_DIO_READ:
+ return;
+ default:
+ break;
+ }
+
switch (subreq->error) {
case -EACCES:
case -EPERM:
diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h
index 8ca868164507..cc5f143d21a3 100644
--- a/fs/afs/xdr_fs.h
+++ b/fs/afs/xdr_fs.h
@@ -88,7 +88,7 @@ union afs_xdr_dir_block {
struct {
struct afs_xdr_dir_hdr hdr;
- u8 alloc_ctrs[AFS_DIR_MAX_BLOCKS];
+ u8 alloc_ctrs[AFS_DIR_BLOCKS_WITH_CTR];
__be16 hashtable[AFS_DIR_HASHTBL_SIZE];
} meta;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 024227aba4cd..f57c089f26ee 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -352,19 +352,19 @@ static int yfs_deliver_status_and_volsync(struct afs_call *call)
static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
{
struct afs_operation *op = call->op;
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
const __be32 *bp;
size_t count_before;
int ret;
_enter("{%u,%zu, %zu/%llu}",
call->unmarshall, call->iov_len, iov_iter_count(call->iter),
- req->actual_len);
+ call->remaining);
switch (call->unmarshall) {
case 0:
- req->actual_len = 0;
+ call->remaining = 0;
afs_extract_to_tmp64(call);
call->unmarshall++;
fallthrough;
@@ -379,42 +379,39 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
if (ret < 0)
return ret;
- req->actual_len = be64_to_cpu(call->tmp64);
- _debug("DATA length: %llu", req->actual_len);
+ call->remaining = be64_to_cpu(call->tmp64);
+ _debug("DATA length: %llu", call->remaining);
- if (req->actual_len == 0)
+ if (call->remaining == 0)
goto no_more_data;
- call->iter = req->iter;
- call->iov_len = min(req->actual_len, req->len);
+ call->iter = &subreq->io_iter;
+ call->iov_len = min(call->remaining, subreq->len - subreq->transferred);
call->unmarshall++;
fallthrough;
/* extract the returned data */
case 2:
count_before = call->iov_len;
- _debug("extract data %zu/%llu", count_before, req->actual_len);
+ _debug("extract data %zu/%llu", count_before, call->remaining);
ret = afs_extract_data(call, true);
- if (req->subreq) {
- req->subreq->transferred += count_before - call->iov_len;
- netfs_read_subreq_progress(req->subreq, false);
- }
+ subreq->transferred += count_before - call->iov_len;
if (ret < 0)
return ret;
call->iter = &call->def_iter;
- if (req->actual_len <= req->len)
+ if (call->remaining)
goto no_more_data;
/* Discard any excess data the server gave us */
- afs_extract_discard(call, req->actual_len - req->len);
+ afs_extract_discard(call, call->remaining);
call->unmarshall = 3;
fallthrough;
case 3:
_debug("extract discard %zu/%llu",
- iov_iter_count(call->iter), req->actual_len - req->len);
+ iov_iter_count(call->iter), call->remaining);
ret = afs_extract_data(call, true);
if (ret < 0)
@@ -439,8 +436,8 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
xdr_decode_YFSCallBack(&bp, call, &vp->scb);
xdr_decode_YFSVolSync(&bp, &op->volsync);
- req->data_version = vp->scb.status.data_version;
- req->file_size = vp->scb.status.size;
+ if (subreq->start + subreq->transferred >= vp->scb.status.size)
+ __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
call->unmarshall++;
fallthrough;
@@ -459,7 +456,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
static const struct afs_call_type yfs_RXYFSFetchData64 = {
.name = "YFS.FetchData64",
.op = yfs_FS_FetchData64,
+ .async_rx = afs_fetch_data_async_rx,
.deliver = yfs_deliver_fs_fetch_data64,
+ .immediate_cancel = afs_fetch_data_immediate_cancel,
.destructor = afs_flat_call_destructor,
};
@@ -468,14 +467,15 @@ static const struct afs_call_type yfs_RXYFSFetchData64 = {
*/
void yfs_fs_fetch_data(struct afs_operation *op)
{
+ struct netfs_io_subrequest *subreq = op->fetch.subreq;
struct afs_vnode_param *vp = &op->file[0];
- struct afs_read *req = op->fetch.req;
struct afs_call *call;
__be32 *bp;
- _enter(",%x,{%llx:%llu},%llx,%llx",
+ _enter(",%x,{%llx:%llu},%llx,%zx",
key_serial(op->key), vp->fid.vid, vp->fid.vnode,
- req->pos, req->len);
+ subreq->start + subreq->transferred,
+ subreq->len - subreq->transferred);
call = afs_alloc_flat_call(op->net, &yfs_RXYFSFetchData64,
sizeof(__be32) * 2 +
@@ -487,15 +487,16 @@ void yfs_fs_fetch_data(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
- req->call_debug_id = call->debug_id;
+ if (op->flags & AFS_OPERATION_ASYNC)
+ call->async = true;
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_u32(bp, YFSFETCHDATA64);
bp = xdr_encode_u32(bp, 0); /* RPC flags */
bp = xdr_encode_YFSFid(bp, &vp->fid);
- bp = xdr_encode_u64(bp, req->pos);
- bp = xdr_encode_u64(bp, req->len);
+ bp = xdr_encode_u64(bp, subreq->start + subreq->transferred);
+ bp = xdr_encode_u64(bp, subreq->len - subreq->transferred);
yfs_check_req(call, bp);
call->fid = vp->fid;
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 6a821a959b59..92058ae43488 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -13,6 +13,7 @@
#include <linux/falloc.h>
#include <linux/sched/mm.h>
#include <trace/events/fscache.h>
+#include <trace/events/netfs.h>
#include "internal.h"
struct cachefiles_kiocb {
@@ -366,6 +367,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
if (term_func)
term_func(term_func_priv, -ENOBUFS, false);
+ trace_netfs_sreq(term_func_priv, netfs_sreq_trace_cache_nowrite);
return -ENOBUFS;
}
@@ -695,6 +697,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
iov_iter_truncate(&subreq->io_iter, len);
}
+ trace_netfs_sreq(subreq, netfs_sreq_trace_cache_prepare);
cachefiles_begin_secure(cache, &saved_cred);
ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
&start, &len, len, true);
@@ -704,6 +707,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
return;
}
+ trace_netfs_sreq(subreq, netfs_sreq_trace_cache_write);
cachefiles_write(&subreq->rreq->cache_resources,
subreq->start, &subreq->io_iter,
netfs_write_subrequest_terminated, subreq);
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 7c6f260a3be5..52383b1d0ba6 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -77,6 +77,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object)
trace_cachefiles_vfs_error(object, file_inode(file), ret,
cachefiles_trace_setxattr_error);
trace_cachefiles_coherency(object, file_inode(file)->i_ino,
+ be64_to_cpup((__be64 *)buf->data),
buf->content,
cachefiles_coherency_set_fail);
if (ret != -ENOMEM)
@@ -85,6 +86,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object)
"Failed to set xattr with error %d", ret);
} else {
trace_cachefiles_coherency(object, file_inode(file)->i_ino,
+ be64_to_cpup((__be64 *)buf->data),
buf->content,
cachefiles_coherency_set_ok);
}
@@ -126,7 +128,10 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
object,
"Failed to read aux with error %zd", xlen);
why = cachefiles_coherency_check_xattr;
- } else if (buf->type != CACHEFILES_COOKIE_TYPE_DATA) {
+ goto out;
+ }
+
+ if (buf->type != CACHEFILES_COOKIE_TYPE_DATA) {
why = cachefiles_coherency_check_type;
} else if (memcmp(buf->data, p, len) != 0) {
why = cachefiles_coherency_check_aux;
@@ -141,7 +146,9 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file
ret = 0;
}
+out:
trace_cachefiles_coherency(object, file_inode(file)->i_ino,
+ be64_to_cpup((__be64 *)buf->data),
buf->content, why);
kfree(buf);
return ret;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 85936f6d2bf7..f5224a566b69 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -223,10 +223,13 @@ static void finish_netfs_read(struct ceph_osd_request *req)
subreq->len, i_size_read(req->r_inode));
/* no object means success but no data */
- if (err == -ENOENT)
+ if (err == -ENOENT) {
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
err = 0;
- else if (err == -EBLOCKLISTED)
+ } else if (err == -EBLOCKLISTED) {
fsc->blocklisted = true;
+ }
if (err >= 0) {
if (sparse && err > 0)
@@ -242,6 +245,8 @@ static void finish_netfs_read(struct ceph_osd_request *req)
if (err > subreq->len)
err = subreq->len;
}
+ if (err > 0)
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
}
if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
@@ -253,8 +258,9 @@ static void finish_netfs_read(struct ceph_osd_request *req)
subreq->transferred = err;
err = 0;
}
+ subreq->error = err;
trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress);
- netfs_read_subreq_terminated(subreq, err, false);
+ netfs_read_subreq_terminated(subreq);
iput(req->r_inode);
ceph_dec_osd_stopping_blocker(fsc->mdsc);
}
@@ -314,7 +320,9 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
ceph_mdsc_put_request(req);
out:
- netfs_read_subreq_terminated(subreq, err, false);
+ subreq->error = err;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress);
+ netfs_read_subreq_terminated(subreq);
return true;
}
@@ -426,8 +434,10 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
ceph_osdc_start_request(req->r_osdc, req);
out:
ceph_osdc_put_request(req);
- if (err)
- netfs_read_subreq_terminated(subreq, err, false);
+ if (err) {
+ subreq->error = err;
+ netfs_read_subreq_terminated(subreq);
+ }
doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err);
}
diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
index d08b0bfb6756..b43188d64bd8 100644
--- a/fs/netfs/Makefile
+++ b/fs/netfs/Makefile
@@ -13,8 +13,11 @@ netfs-y := \
read_collect.o \
read_pgpriv2.o \
read_retry.o \
+ read_single.o \
+ rolling_buffer.o \
write_collect.o \
- write_issue.o
+ write_issue.o \
+ write_retry.o
netfs-$(CONFIG_NETFS_STATS) += stats.o
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 4dc9b8286355..f761d44b3436 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -64,37 +64,6 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in
}
/*
- * Decant the list of folios to read into a rolling buffer.
- */
-static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
- struct folio_queue *folioq,
- struct folio_batch *put_batch)
-{
- unsigned int order, nr;
- size_t size = 0;
-
- nr = __readahead_batch(rreq->ractl, (struct page **)folioq->vec.folios,
- ARRAY_SIZE(folioq->vec.folios));
- folioq->vec.nr = nr;
- for (int i = 0; i < nr; i++) {
- struct folio *folio = folioq_folio(folioq, i);
-
- trace_netfs_folio(folio, netfs_folio_trace_read);
- order = folio_order(folio);
- folioq->orders[i] = order;
- size += PAGE_SIZE << order;
-
- if (!folio_batch_add(put_batch, folio))
- folio_batch_release(put_batch);
- }
-
- for (int i = nr; i < folioq_nr_slots(folioq); i++)
- folioq_clear(folioq, i);
-
- return size;
-}
-
-/*
* netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
* @subreq: The subrequest to be set up
*
@@ -128,19 +97,12 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
folio_batch_init(&put_batch);
while (rreq->submitted < subreq->start + rsize) {
- struct folio_queue *tail = rreq->buffer_tail, *new;
- size_t added;
-
- new = kmalloc(sizeof(*new), GFP_NOFS);
- if (!new)
- return -ENOMEM;
- netfs_stat(&netfs_n_folioq);
- folioq_init(new);
- new->prev = tail;
- tail->next = new;
- rreq->buffer_tail = new;
- added = netfs_load_buffer_from_ra(rreq, new, &put_batch);
- rreq->iter.count += added;
+ ssize_t added;
+
+ added = rolling_buffer_load_from_ra(&rreq->buffer, rreq->ractl,
+ &put_batch);
+ if (added < 0)
+ return added;
rreq->submitted += added;
}
folio_batch_release(&put_batch);
@@ -148,7 +110,7 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
subreq->len = rsize;
if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
- size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
+ size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
rreq->io_streams[0].sreq_max_segs);
if (limit < rsize) {
@@ -157,20 +119,10 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
}
}
- subreq->io_iter = rreq->iter;
-
- if (iov_iter_is_folioq(&subreq->io_iter)) {
- if (subreq->io_iter.folioq_slot >= folioq_nr_slots(subreq->io_iter.folioq)) {
- subreq->io_iter.folioq = subreq->io_iter.folioq->next;
- subreq->io_iter.folioq_slot = 0;
- }
- subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq;
- subreq->curr_folioq_slot = subreq->io_iter.folioq_slot;
- subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
- }
+ subreq->io_iter = rreq->buffer.iter;
iov_iter_truncate(&subreq->io_iter, subreq->len);
- iov_iter_advance(&rreq->iter, subreq->len);
+ rolling_buffer_advance(&rreq->buffer, subreq->len);
return subreq->len;
}
@@ -179,25 +131,14 @@ static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rr
loff_t i_size)
{
struct netfs_cache_resources *cres = &rreq->cache_resources;
+ enum netfs_io_source source;
if (!cres->ops)
return NETFS_DOWNLOAD_FROM_SERVER;
- return cres->ops->prepare_read(subreq, i_size);
-}
-
-static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
- bool was_async)
-{
- struct netfs_io_subrequest *subreq = priv;
-
- if (transferred_or_error < 0) {
- netfs_read_subreq_terminated(subreq, transferred_or_error, was_async);
- return;
- }
+ source = cres->ops->prepare_read(subreq, i_size);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+ return source;
- if (transferred_or_error > 0)
- subreq->transferred += transferred_or_error;
- netfs_read_subreq_terminated(subreq, 0, was_async);
}
/*
@@ -214,6 +155,47 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
netfs_cache_read_terminated, subreq);
}
+static void netfs_issue_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+
+ /* We add to the end of the list whilst the collector may be walking
+ * the list. The collector only goes nextwards and uses the lock to
+ * remove entries off of the front.
+ */
+ spin_lock(&rreq->lock);
+ list_add_tail(&subreq->rreq_link, &stream->subrequests);
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
+ stream->front = subreq;
+ if (!stream->active) {
+ stream->collected_to = stream->front->start;
+ /* Store list pointers before active flag */
+ smp_store_release(&stream->active, true);
+ }
+ }
+
+ spin_unlock(&rreq->lock);
+
+ switch (subreq->source) {
+ case NETFS_DOWNLOAD_FROM_SERVER:
+ rreq->netfs_ops->issue_read(subreq);
+ break;
+ case NETFS_READ_FROM_CACHE:
+ netfs_read_cache_to_pagecache(rreq, subreq);
+ break;
+ default:
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ subreq->error = 0;
+ iov_iter_zero(subreq->len, &subreq->io_iter);
+ subreq->transferred = subreq->len;
+ netfs_read_subreq_terminated(subreq);
+ break;
+ }
+}
+
/*
* Perform a read to the pagecache from a series of sources of different types,
* slicing up the region to be read according to available cache blocks and
@@ -226,11 +208,9 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
ssize_t size = rreq->len;
int ret = 0;
- atomic_inc(&rreq->nr_outstanding);
-
do {
struct netfs_io_subrequest *subreq;
- enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
+ enum netfs_io_source source = NETFS_SOURCE_UNKNOWN;
ssize_t slice;
subreq = netfs_alloc_subrequest(rreq);
@@ -242,20 +222,14 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
subreq->start = start;
subreq->len = size;
- atomic_inc(&rreq->nr_outstanding);
- spin_lock_bh(&rreq->lock);
- list_add_tail(&subreq->rreq_link, &rreq->subrequests);
- subreq->prev_donated = rreq->prev_donated;
- rreq->prev_donated = 0;
- trace_netfs_sreq(subreq, netfs_sreq_trace_added);
- spin_unlock_bh(&rreq->lock);
-
source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
subreq->source = source;
if (source == NETFS_DOWNLOAD_FROM_SERVER) {
unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
size_t len = subreq->len;
+ if (unlikely(rreq->origin == NETFS_READ_SINGLE))
+ zp = rreq->i_size;
if (subreq->start >= zp) {
subreq->source = source = NETFS_FILL_WITH_ZEROES;
goto fill_with_zeroes;
@@ -275,17 +249,18 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
netfs_stat(&netfs_n_rh_download);
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
- if (ret < 0)
- goto prep_failed;
+ if (ret < 0) {
+ subreq->error = ret;
+ /* Not queued - release both refs. */
+ netfs_put_subrequest(subreq, false,
+ netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, false,
+ netfs_sreq_trace_put_cancel);
+ break;
+ }
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
}
-
- slice = netfs_prepare_read_iterator(subreq);
- if (slice < 0)
- goto prep_iter_failed;
-
- rreq->netfs_ops->issue_read(subreq);
- goto done;
+ goto issue;
}
fill_with_zeroes:
@@ -293,94 +268,50 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
subreq->source = NETFS_FILL_WITH_ZEROES;
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
netfs_stat(&netfs_n_rh_zero);
- slice = netfs_prepare_read_iterator(subreq);
- if (slice < 0)
- goto prep_iter_failed;
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
- netfs_read_subreq_terminated(subreq, 0, false);
- goto done;
+ goto issue;
}
if (source == NETFS_READ_FROM_CACHE) {
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
- slice = netfs_prepare_read_iterator(subreq);
- if (slice < 0)
- goto prep_iter_failed;
- netfs_read_cache_to_pagecache(rreq, subreq);
- goto done;
+ goto issue;
}
pr_err("Unexpected read source %u\n", source);
WARN_ON_ONCE(1);
break;
- prep_iter_failed:
- ret = slice;
- prep_failed:
- subreq->error = ret;
- atomic_dec(&rreq->nr_outstanding);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
- break;
-
- done:
+ issue:
+ slice = netfs_prepare_read_iterator(subreq);
+ if (slice < 0) {
+ ret = slice;
+ subreq->error = ret;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
+ /* Not queued - release both refs. */
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ break;
+ }
size -= slice;
start += slice;
+ if (size <= 0) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ }
+
+ netfs_issue_read(rreq, subreq);
cond_resched();
} while (size > 0);
- if (atomic_dec_and_test(&rreq->nr_outstanding))
- netfs_rreq_terminated(rreq, false);
+ if (unlikely(size > 0)) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ netfs_wake_read_collector(rreq);
+ }
/* Defer error return as we may need to wait for outstanding I/O. */
cmpxchg(&rreq->error, 0, ret);
}
-/*
- * Wait for the read operation to complete, successfully or otherwise.
- */
-static int netfs_wait_for_read(struct netfs_io_request *rreq)
-{
- int ret;
-
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
- wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
- ret = rreq->error;
- if (ret == 0 && rreq->submitted < rreq->len) {
- trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
- ret = -EIO;
- }
-
- return ret;
-}
-
-/*
- * Set up the initial folioq of buffer folios in the rolling buffer and set the
- * iterator to refer to it.
- */
-static int netfs_prime_buffer(struct netfs_io_request *rreq)
-{
- struct folio_queue *folioq;
- struct folio_batch put_batch;
- size_t added;
-
- folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
- if (!folioq)
- return -ENOMEM;
- netfs_stat(&netfs_n_folioq);
- folioq_init(folioq);
- rreq->buffer = folioq;
- rreq->buffer_tail = folioq;
- rreq->submitted = rreq->start;
- iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, 0);
-
- folio_batch_init(&put_batch);
- added = netfs_load_buffer_from_ra(rreq, folioq, &put_batch);
- folio_batch_release(&put_batch);
- rreq->iter.count += added;
- rreq->submitted += added;
- return 0;
-}
-
/**
* netfs_readahead - Helper to manage a read request
* @ractl: The description of the readahead request
@@ -409,6 +340,8 @@ void netfs_readahead(struct readahead_control *ractl)
if (IS_ERR(rreq))
return;
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
+
ret = netfs_begin_cache_read(rreq, ictx);
if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
goto cleanup_free;
@@ -420,7 +353,8 @@ void netfs_readahead(struct readahead_control *ractl)
netfs_rreq_expand(rreq, ractl);
rreq->ractl = ractl;
- if (netfs_prime_buffer(rreq) < 0)
+ rreq->submitted = rreq->start;
+ if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
goto cleanup_free;
netfs_read_to_pagecache(rreq);
@@ -436,23 +370,18 @@ EXPORT_SYMBOL(netfs_readahead);
/*
* Create a rolling buffer with a single occupying folio.
*/
-static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio)
+static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct folio *folio,
+ unsigned int rollbuf_flags)
{
- struct folio_queue *folioq;
+ ssize_t added;
- folioq = kmalloc(sizeof(*folioq), GFP_KERNEL);
- if (!folioq)
+ if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0)
return -ENOMEM;
- netfs_stat(&netfs_n_folioq);
- folioq_init(folioq);
- folioq_append(folioq, folio);
- BUG_ON(folioq_folio(folioq, 0) != folio);
- BUG_ON(folioq_folio_order(folioq, 0) != folio_order(folio));
- rreq->buffer = folioq;
- rreq->buffer_tail = folioq;
- rreq->submitted = rreq->start + rreq->len;
- iov_iter_folio_queue(&rreq->iter, ITER_DEST, folioq, 0, 0, rreq->len);
+ added = rolling_buffer_append(&rreq->buffer, folio, rollbuf_flags);
+ if (added < 0)
+ return added;
+ rreq->submitted = rreq->start + added;
rreq->ractl = (struct readahead_control *)1UL;
return 0;
}
@@ -520,7 +449,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
}
if (to < flen)
bvec_set_folio(&bvec[i++], folio, flen - to, to);
- iov_iter_bvec(&rreq->iter, ITER_DEST, bvec, i, rreq->len);
+ iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len);
rreq->submitted = rreq->start + flen;
netfs_read_to_pagecache(rreq);
@@ -529,7 +458,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
folio_put(sink);
ret = netfs_wait_for_read(rreq);
- if (ret == 0) {
+ if (ret >= 0) {
flush_dcache_folio(folio);
folio_mark_uptodate(folio);
}
@@ -588,7 +517,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
/* Set up the output buffer */
- ret = netfs_create_singular_buffer(rreq, folio);
+ ret = netfs_create_singular_buffer(rreq, folio, 0);
if (ret < 0)
goto discard;
@@ -745,7 +674,7 @@ retry:
trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);
/* Set up the output buffer */
- ret = netfs_create_singular_buffer(rreq, folio);
+ ret = netfs_create_singular_buffer(rreq, folio, 0);
if (ret < 0)
goto error_put;
@@ -810,15 +739,14 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
/* Set up the output buffer */
- ret = netfs_create_singular_buffer(rreq, folio);
+ ret = netfs_create_singular_buffer(rreq, folio, NETFS_ROLLBUF_PAGECACHE_MARK);
if (ret < 0)
goto error_put;
- folioq_mark2(rreq->buffer, 0);
netfs_read_to_pagecache(rreq);
ret = netfs_wait_for_read(rreq);
netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
- return ret;
+ return ret < 0 ? ret : 0;
error_put:
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index b1a66a6e6bc2..0bf3c2f5a710 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -25,7 +25,7 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
subreq->len = rsize;
if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
- size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
+ size_t limit = netfs_limit_iter(&rreq->buffer.iter, 0, rsize,
rreq->io_streams[0].sreq_max_segs);
if (limit < rsize) {
@@ -36,9 +36,9 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
- subreq->io_iter = rreq->iter;
+ subreq->io_iter = rreq->buffer.iter;
iov_iter_truncate(&subreq->io_iter, subreq->len);
- iov_iter_advance(&rreq->iter, subreq->len);
+ iov_iter_advance(&rreq->buffer.iter, subreq->len);
}
/*
@@ -47,12 +47,11 @@ static void netfs_prepare_dio_read_iterator(struct netfs_io_subrequest *subreq)
*/
static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
{
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
unsigned long long start = rreq->start;
ssize_t size = rreq->len;
int ret = 0;
- atomic_set(&rreq->nr_outstanding, 1);
-
do {
struct netfs_io_subrequest *subreq;
ssize_t slice;
@@ -67,19 +66,25 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
subreq->start = start;
subreq->len = size;
- atomic_inc(&rreq->nr_outstanding);
- spin_lock_bh(&rreq->lock);
- list_add_tail(&subreq->rreq_link, &rreq->subrequests);
- subreq->prev_donated = rreq->prev_donated;
- rreq->prev_donated = 0;
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+
+ spin_lock(&rreq->lock);
+ list_add_tail(&subreq->rreq_link, &stream->subrequests);
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
+ stream->front = subreq;
+ if (!stream->active) {
+ stream->collected_to = stream->front->start;
+ /* Store list pointers before active flag */
+ smp_store_release(&stream->active, true);
+ }
+ }
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
- spin_unlock_bh(&rreq->lock);
+ spin_unlock(&rreq->lock);
netfs_stat(&netfs_n_rh_download);
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
- atomic_dec(&rreq->nr_outstanding);
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
break;
}
@@ -87,20 +92,32 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
netfs_prepare_dio_read_iterator(subreq);
slice = subreq->len;
- rreq->netfs_ops->issue_read(subreq);
-
size -= slice;
start += slice;
rreq->submitted += slice;
+ if (size <= 0) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ }
+
+ rreq->netfs_ops->issue_read(subreq);
+ if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ netfs_wait_for_pause(rreq);
+ if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
+ break;
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
break;
cond_resched();
} while (size > 0);
- if (atomic_dec_and_test(&rreq->nr_outstanding))
- netfs_rreq_terminated(rreq, false);
+ if (unlikely(size > 0)) {
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ netfs_wake_read_collector(rreq);
+ }
+
return ret;
}
@@ -133,21 +150,10 @@ static int netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
goto out;
}
- if (sync) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
- wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
-
- ret = rreq->error;
- if (ret == 0 && rreq->submitted < rreq->len &&
- rreq->origin != NETFS_DIO_READ) {
- trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
- ret = -EIO;
- }
- } else {
+ if (sync)
+ ret = netfs_wait_for_read(rreq);
+ else
ret = -EIOCBQUEUED;
- }
-
out:
_leave(" = %d", ret);
return ret;
@@ -199,15 +205,15 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
* the request.
*/
if (user_backed_iter(iter)) {
- ret = netfs_extract_user_iter(iter, rreq->len, &rreq->iter, 0);
+ ret = netfs_extract_user_iter(iter, rreq->len, &rreq->buffer.iter, 0);
if (ret < 0)
goto out;
- rreq->direct_bv = (struct bio_vec *)rreq->iter.bvec;
+ rreq->direct_bv = (struct bio_vec *)rreq->buffer.iter.bvec;
rreq->direct_bv_count = ret;
rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
- rreq->len = iov_iter_count(&rreq->iter);
+ rreq->len = iov_iter_count(&rreq->buffer.iter);
} else {
- rreq->iter = *iter;
+ rreq->buffer.iter = *iter;
rreq->len = orig_count;
rreq->direct_bv_unpin = false;
iov_iter_advance(iter, orig_count);
@@ -215,8 +221,10 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
// TODO: Set up bounce buffer if needed
- if (!sync)
+ if (!sync) {
rreq->iocb = iocb;
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags);
+ }
ret = netfs_unbuffered_read(rreq, sync);
if (ret < 0)
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 173e8b5e6a93..eded8afaa60b 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -68,19 +68,17 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
* request.
*/
if (async || user_backed_iter(iter)) {
- n = netfs_extract_user_iter(iter, len, &wreq->iter, 0);
+ n = netfs_extract_user_iter(iter, len, &wreq->buffer.iter, 0);
if (n < 0) {
ret = n;
goto out;
}
- wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec;
+ wreq->direct_bv = (struct bio_vec *)wreq->buffer.iter.bvec;
wreq->direct_bv_count = n;
wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
} else {
- wreq->iter = *iter;
+ wreq->buffer.iter = *iter;
}
-
- wreq->io_iter = wreq->iter;
}
__set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags);
@@ -92,7 +90,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
if (async)
wreq->iocb = iocb;
- wreq->len = iov_iter_count(&wreq->io_iter);
+ wreq->len = iov_iter_count(&wreq->buffer.iter);
wreq->cleanup = netfs_cleanup_dio_write;
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index c562aec3b483..eb76f98c894b 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -23,6 +23,7 @@
/*
* buffered_read.c
*/
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
@@ -58,11 +59,8 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
/*
* misc.c
*/
-struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq);
-int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
- bool needs_put);
-struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq);
-void netfs_clear_buffer(struct netfs_io_request *rreq);
+struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq,
+ enum netfs_folioq_trace trace);
void netfs_reset_iter(struct netfs_io_subrequest *subreq);
/*
@@ -84,20 +82,27 @@ static inline void netfs_see_request(struct netfs_io_request *rreq,
trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), what);
}
+static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq,
+ enum netfs_sreq_ref_trace what)
+{
+ trace_netfs_sreq_ref(subreq->rreq->debug_id, subreq->debug_index,
+ refcount_read(&subreq->ref), what);
+}
+
/*
* read_collect.c
*/
-void netfs_read_termination_worker(struct work_struct *work);
-void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async);
+void netfs_read_collection_worker(struct work_struct *work);
+void netfs_wake_read_collector(struct netfs_io_request *rreq);
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
+ssize_t netfs_wait_for_read(struct netfs_io_request *rreq);
+void netfs_wait_for_pause(struct netfs_io_request *rreq);
/*
* read_pgpriv2.c
*/
-void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
- struct netfs_io_request *rreq,
- struct folio_queue *folioq,
- int slot);
-void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq);
+void netfs_pgpriv2_copy_to_cache(struct netfs_io_request *rreq, struct folio *folio);
+void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq);
bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq);
/*
@@ -113,6 +118,7 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq);
extern atomic_t netfs_n_rh_dio_read;
extern atomic_t netfs_n_rh_readahead;
extern atomic_t netfs_n_rh_read_folio;
+extern atomic_t netfs_n_rh_read_single;
extern atomic_t netfs_n_rh_rreq;
extern atomic_t netfs_n_rh_sreq;
extern atomic_t netfs_n_rh_download;
@@ -181,9 +187,9 @@ void netfs_reissue_write(struct netfs_io_stream *stream,
struct iov_iter *source);
void netfs_issue_write(struct netfs_io_request *wreq,
struct netfs_io_stream *stream);
-int netfs_advance_write(struct netfs_io_request *wreq,
- struct netfs_io_stream *stream,
- loff_t start, size_t len, bool to_eof);
+size_t netfs_advance_write(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream,
+ loff_t start, size_t len, bool to_eof);
struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
struct folio *folio, size_t copied, bool to_page_end,
@@ -193,6 +199,11 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
/*
+ * write_retry.c
+ */
+void netfs_retry_writes(struct netfs_io_request *wreq);
+
+/*
* Miscellaneous functions.
*/
static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 6c7be1377ee0..4e3e62040831 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -37,9 +37,11 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
[NETFS_READAHEAD] = "RA",
[NETFS_READPAGE] = "RP",
[NETFS_READ_GAPS] = "RG",
+ [NETFS_READ_SINGLE] = "R1",
[NETFS_READ_FOR_WRITE] = "RW",
[NETFS_DIO_READ] = "DR",
[NETFS_WRITEBACK] = "WB",
+ [NETFS_WRITEBACK_SINGLE] = "W1",
[NETFS_WRITETHROUGH] = "WT",
[NETFS_UNBUFFERED_WRITE] = "UW",
[NETFS_DIO_WRITE] = "DW",
@@ -69,7 +71,7 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v)
refcount_read(&rreq->ref),
rreq->flags,
rreq->error,
- atomic_read(&rreq->nr_outstanding),
+ 0,
rreq->start, rreq->submitted, rreq->len);
seq_putc(m, '\n');
return 0;
@@ -116,7 +118,7 @@ static int __init netfs_init(void)
goto error_reqpool;
netfs_subrequest_slab = kmem_cache_create("netfs_subrequest",
- sizeof(struct netfs_io_subrequest), 0,
+ sizeof(struct netfs_io_subrequest) + 16, 0,
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
NULL);
if (!netfs_subrequest_slab)
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 78fe5796b2b2..7099aa07737a 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -8,113 +8,101 @@
#include <linux/swap.h>
#include "internal.h"
-/*
- * Make sure there's space in the rolling queue.
+/**
+ * netfs_alloc_folioq_buffer - Allocate buffer space into a folio queue
+ * @mapping: Address space to set on the folio (or NULL).
+ * @_buffer: Pointer to the folio queue to add to (may point to a NULL; updated).
+ * @_cur_size: Current size of the buffer (updated).
+ * @size: Target size of the buffer.
+ * @gfp: The allocation constraints.
*/
-struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq)
+int netfs_alloc_folioq_buffer(struct address_space *mapping,
+ struct folio_queue **_buffer,
+ size_t *_cur_size, ssize_t size, gfp_t gfp)
{
- struct folio_queue *tail = rreq->buffer_tail, *prev;
- unsigned int prev_nr_slots = 0;
-
- if (WARN_ON_ONCE(!rreq->buffer && tail) ||
- WARN_ON_ONCE(rreq->buffer && !tail))
- return ERR_PTR(-EIO);
-
- prev = tail;
- if (prev) {
- if (!folioq_full(tail))
- return tail;
- prev_nr_slots = folioq_nr_slots(tail);
- }
-
- tail = kmalloc(sizeof(*tail), GFP_NOFS);
- if (!tail)
- return ERR_PTR(-ENOMEM);
- netfs_stat(&netfs_n_folioq);
- folioq_init(tail);
- tail->prev = prev;
- if (prev)
- /* [!] NOTE: After we set prev->next, the consumer is entirely
- * at liberty to delete prev.
- */
- WRITE_ONCE(prev->next, tail);
-
- rreq->buffer_tail = tail;
- if (!rreq->buffer) {
- rreq->buffer = tail;
- iov_iter_folio_queue(&rreq->io_iter, ITER_SOURCE, tail, 0, 0, 0);
- } else {
- /* Make sure we don't leave the master iterator pointing to a
- * block that might get immediately consumed.
- */
- if (rreq->io_iter.folioq == prev &&
- rreq->io_iter.folioq_slot == prev_nr_slots) {
- rreq->io_iter.folioq = tail;
- rreq->io_iter.folioq_slot = 0;
+ struct folio_queue *tail = *_buffer, *p;
+
+ size = round_up(size, PAGE_SIZE);
+ if (*_cur_size >= size)
+ return 0;
+
+ if (tail)
+ while (tail->next)
+ tail = tail->next;
+
+ do {
+ struct folio *folio;
+ int order = 0, slot;
+
+ if (!tail || folioq_full(tail)) {
+ p = netfs_folioq_alloc(0, GFP_NOFS, netfs_trace_folioq_alloc_buffer);
+ if (!p)
+ return -ENOMEM;
+ if (tail) {
+ tail->next = p;
+ p->prev = tail;
+ } else {
+ *_buffer = p;
+ }
+ tail = p;
}
- }
- rreq->buffer_tail_slot = 0;
- return tail;
-}
-/*
- * Append a folio to the rolling queue.
- */
-int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
- bool needs_put)
-{
- struct folio_queue *tail;
- unsigned int slot, order = folio_order(folio);
+ if (size - *_cur_size > PAGE_SIZE)
+ order = umin(ilog2(size - *_cur_size) - PAGE_SHIFT,
+ MAX_PAGECACHE_ORDER);
- tail = netfs_buffer_make_space(rreq);
- if (IS_ERR(tail))
- return PTR_ERR(tail);
+ folio = folio_alloc(gfp, order);
+ if (!folio && order > 0)
+ folio = folio_alloc(gfp, 0);
+ if (!folio)
+ return -ENOMEM;
- rreq->io_iter.count += PAGE_SIZE << order;
+ folio->mapping = mapping;
+ folio->index = *_cur_size / PAGE_SIZE;
+ trace_netfs_folio(folio, netfs_folio_trace_alloc_buffer);
+ slot = folioq_append_mark(tail, folio);
+ *_cur_size += folioq_folio_size(tail, slot);
+ } while (*_cur_size < size);
- slot = folioq_append(tail, folio);
- /* Store the counter after setting the slot. */
- smp_store_release(&rreq->buffer_tail_slot, slot);
return 0;
}
+EXPORT_SYMBOL(netfs_alloc_folioq_buffer);
-/*
- * Delete the head of a rolling queue.
+/**
+ * netfs_free_folioq_buffer - Free a folio queue.
+ * @fq: The start of the folio queue to free
+ *
+ * Free up a chain of folio_queues and, if marked, the marked folios they point
+ * to.
*/
-struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq)
+void netfs_free_folioq_buffer(struct folio_queue *fq)
{
- struct folio_queue *head = wreq->buffer, *next = head->next;
-
- if (next)
- next->prev = NULL;
- netfs_stat_d(&netfs_n_folioq);
- kfree(head);
- wreq->buffer = next;
- return next;
-}
+ struct folio_queue *next;
+ struct folio_batch fbatch;
-/*
- * Clear out a rolling queue.
- */
-void netfs_clear_buffer(struct netfs_io_request *rreq)
-{
- struct folio_queue *p;
+ folio_batch_init(&fbatch);
+
+ for (; fq; fq = next) {
+ for (int slot = 0; slot < folioq_count(fq); slot++) {
+ struct folio *folio = folioq_folio(fq, slot);
- while ((p = rreq->buffer)) {
- rreq->buffer = p->next;
- for (int slot = 0; slot < folioq_count(p); slot++) {
- struct folio *folio = folioq_folio(p, slot);
- if (!folio)
+ if (!folio ||
+ !folioq_is_marked(fq, slot))
continue;
- if (folioq_is_marked(p, slot)) {
- trace_netfs_folio(folio, netfs_folio_trace_put);
- folio_put(folio);
- }
+
+ trace_netfs_folio(folio, netfs_folio_trace_put);
+ if (folio_batch_add(&fbatch, folio))
+ folio_batch_release(&fbatch);
}
+
netfs_stat_d(&netfs_n_folioq);
- kfree(p);
+ next = fq->next;
+ kfree(fq);
}
+
+ folio_batch_release(&fbatch);
}
+EXPORT_SYMBOL(netfs_free_folioq_buffer);
/*
* Reset the subrequest iterator to refer just to the region remaining to be
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index 31e388ec6e48..dc6b41ef18b0 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -48,17 +48,20 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
spin_lock_init(&rreq->lock);
INIT_LIST_HEAD(&rreq->io_streams[0].subrequests);
INIT_LIST_HEAD(&rreq->io_streams[1].subrequests);
- INIT_LIST_HEAD(&rreq->subrequests);
+ init_waitqueue_head(&rreq->waitq);
refcount_set(&rreq->ref, 1);
if (origin == NETFS_READAHEAD ||
origin == NETFS_READPAGE ||
origin == NETFS_READ_GAPS ||
+ origin == NETFS_READ_SINGLE ||
origin == NETFS_READ_FOR_WRITE ||
- origin == NETFS_DIO_READ)
- INIT_WORK(&rreq->work, netfs_read_termination_worker);
- else
+ origin == NETFS_DIO_READ) {
+ INIT_WORK(&rreq->work, netfs_read_collection_worker);
+ rreq->io_streams[0].avail = true;
+ } else {
INIT_WORK(&rreq->work, netfs_write_collection_worker);
+ }
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
if (file && file->f_flags & O_NONBLOCK)
@@ -92,14 +95,6 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
struct netfs_io_stream *stream;
int s;
- while (!list_empty(&rreq->subrequests)) {
- subreq = list_first_entry(&rreq->subrequests,
- struct netfs_io_subrequest, rreq_link);
- list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, was_async,
- netfs_sreq_trace_put_clear);
- }
-
for (s = 0; s < ARRAY_SIZE(rreq->io_streams); s++) {
stream = &rreq->io_streams[s];
while (!list_empty(&stream->subrequests)) {
@@ -143,7 +138,7 @@ static void netfs_free_request(struct work_struct *work)
}
kvfree(rreq->direct_bv);
}
- netfs_clear_buffer(rreq);
+ rolling_buffer_clear(&rreq->buffer);
if (atomic_dec_and_test(&ictx->io_count))
wake_up_var(&ictx->io_count);
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index e8624f5c7fcc..f65affa5a9e4 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -14,6 +14,14 @@
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
+/* Notes made in the collector */
+#define HIT_PENDING 0x01 /* A front op was still pending */
+#define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */
+#define BUFFERED 0x08 /* The pagecache needs cleaning up */
+#define NEED_RETRY 0x10 /* A front op requests retrying */
+#define COPY_TO_CACHE 0x40 /* Need to copy subrequest to cache */
+#define ABANDON_SREQ 0x80 /* Need to abandon untransferred part of subrequest */
+
/*
* Clear the unread part of an I/O request.
*/
@@ -31,14 +39,18 @@ static void netfs_clear_unread(struct netfs_io_subrequest *subreq)
* cache the folio, we set the group to NETFS_FOLIO_COPY_TO_CACHE, mark it
* dirty and let writeback handle it.
*/
-static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
- struct netfs_io_request *rreq,
+static void netfs_unlock_read_folio(struct netfs_io_request *rreq,
struct folio_queue *folioq,
int slot)
{
struct netfs_folio *finfo;
struct folio *folio = folioq_folio(folioq, slot);
+ if (unlikely(folio_pos(folio) < rreq->abandon_to)) {
+ trace_netfs_folio(folio, netfs_folio_trace_abandon);
+ goto just_unlock;
+ }
+
flush_dcache_folio(folio);
folio_mark_uptodate(folio);
@@ -53,7 +65,7 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
kfree(finfo);
}
- if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
+ if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags)) {
if (!WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
@@ -66,12 +78,11 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
folioq_clear(folioq, slot);
} else {
// TODO: Use of PG_private_2 is deprecated.
- if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
- netfs_pgpriv2_mark_copy_to_cache(subreq, rreq, folioq, slot);
- else
- folioq_clear(folioq, slot);
+ if (test_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags))
+ netfs_pgpriv2_copy_to_cache(rreq, folio);
}
+just_unlock:
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
if (folio->index == rreq->no_unlock_folio &&
test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) {
@@ -81,238 +92,249 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
folio_unlock(folio);
}
}
+
+ folioq_clear(folioq, slot);
}
/*
- * Unlock any folios that are now completely read. Returns true if the
- * subrequest is removed from the list.
+ * Unlock any folios we've finished with.
*/
-static bool netfs_consume_read_data(struct netfs_io_subrequest *subreq, bool was_async)
+static void netfs_read_unlock_folios(struct netfs_io_request *rreq,
+ unsigned int *notes)
{
- struct netfs_io_subrequest *prev, *next;
- struct netfs_io_request *rreq = subreq->rreq;
- struct folio_queue *folioq = subreq->curr_folioq;
- size_t avail, prev_donated, next_donated, fsize, part, excess;
- loff_t fpos, start;
- loff_t fend;
- int slot = subreq->curr_folioq_slot;
-
- if (WARN(subreq->transferred > subreq->len,
- "Subreq overread: R%x[%x] %zu > %zu",
- rreq->debug_id, subreq->debug_index,
- subreq->transferred, subreq->len))
- subreq->transferred = subreq->len;
-
-next_folio:
- fsize = PAGE_SIZE << subreq->curr_folio_order;
- fpos = round_down(subreq->start + subreq->consumed, fsize);
- fend = fpos + fsize;
-
- if (WARN_ON_ONCE(!folioq) ||
- WARN_ON_ONCE(!folioq_folio(folioq, slot)) ||
- WARN_ON_ONCE(folioq_folio(folioq, slot)->index != fpos / PAGE_SIZE)) {
- pr_err("R=%08x[%x] s=%llx-%llx ctl=%zx/%zx/%zx sl=%u\n",
- rreq->debug_id, subreq->debug_index,
- subreq->start, subreq->start + subreq->transferred - 1,
- subreq->consumed, subreq->transferred, subreq->len,
- slot);
- if (folioq) {
- struct folio *folio = folioq_folio(folioq, slot);
-
- pr_err("folioq: orders=%02x%02x%02x%02x\n",
- folioq->orders[0], folioq->orders[1],
- folioq->orders[2], folioq->orders[3]);
- if (folio)
- pr_err("folio: %llx-%llx ix=%llx o=%u qo=%u\n",
- fpos, fend - 1, folio_pos(folio), folio_order(folio),
- folioq_folio_order(folioq, slot));
- }
- }
+ struct folio_queue *folioq = rreq->buffer.tail;
+ unsigned long long collected_to = rreq->collected_to;
+ unsigned int slot = rreq->buffer.first_tail_slot;
-donation_changed:
- /* Try to consume the current folio if we've hit or passed the end of
- * it. There's a possibility that this subreq doesn't start at the
- * beginning of the folio, in which case we need to donate to/from the
- * preceding subreq.
- *
- * We also need to include any potential donation back from the
- * following subreq.
- */
- prev_donated = READ_ONCE(subreq->prev_donated);
- next_donated = READ_ONCE(subreq->next_donated);
- if (prev_donated || next_donated) {
- spin_lock_bh(&rreq->lock);
- prev_donated = subreq->prev_donated;
- next_donated = subreq->next_donated;
- subreq->start -= prev_donated;
- subreq->len += prev_donated;
- subreq->transferred += prev_donated;
- prev_donated = subreq->prev_donated = 0;
- if (subreq->transferred == subreq->len) {
- subreq->len += next_donated;
- subreq->transferred += next_donated;
- next_donated = subreq->next_donated = 0;
+ if (rreq->cleaned_to >= rreq->collected_to)
+ return;
+
+ // TODO: Begin decryption
+
+ if (slot >= folioq_nr_slots(folioq)) {
+ folioq = rolling_buffer_delete_spent(&rreq->buffer);
+ if (!folioq) {
+ rreq->front_folio_order = 0;
+ return;
}
- trace_netfs_sreq(subreq, netfs_sreq_trace_add_donations);
- spin_unlock_bh(&rreq->lock);
+ slot = 0;
}
- avail = subreq->transferred;
- if (avail == subreq->len)
- avail += next_donated;
- start = subreq->start;
- if (subreq->consumed == 0) {
- start -= prev_donated;
- avail += prev_donated;
- } else {
- start += subreq->consumed;
- avail -= subreq->consumed;
- }
- part = umin(avail, fsize);
-
- trace_netfs_progress(subreq, start, avail, part);
-
- if (start + avail >= fend) {
- if (fpos == start) {
- /* Flush, unlock and mark for caching any folio we've just read. */
- subreq->consumed = fend - subreq->start;
- netfs_unlock_read_folio(subreq, rreq, folioq, slot);
- folioq_mark2(folioq, slot);
- if (subreq->consumed >= subreq->len)
- goto remove_subreq;
- } else if (fpos < start) {
- excess = fend - subreq->start;
-
- spin_lock_bh(&rreq->lock);
- /* If we complete first on a folio split with the
- * preceding subreq, donate to that subreq - otherwise
- * we get the responsibility.
- */
- if (subreq->prev_donated != prev_donated) {
- spin_unlock_bh(&rreq->lock);
- goto donation_changed;
- }
+ for (;;) {
+ struct folio *folio;
+ unsigned long long fpos, fend;
+ unsigned int order;
+ size_t fsize;
- if (list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
- spin_unlock_bh(&rreq->lock);
- pr_err("Can't donate prior to front\n");
- goto bad;
- }
+ if (*notes & COPY_TO_CACHE)
+ set_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
- prev = list_prev_entry(subreq, rreq_link);
- WRITE_ONCE(prev->next_donated, prev->next_donated + excess);
- subreq->start += excess;
- subreq->len -= excess;
- subreq->transferred -= excess;
- trace_netfs_donate(rreq, subreq, prev, excess,
- netfs_trace_donate_tail_to_prev);
- trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
-
- if (subreq->consumed >= subreq->len)
- goto remove_subreq_locked;
- spin_unlock_bh(&rreq->lock);
- } else {
- pr_err("fpos > start\n");
- goto bad;
- }
+ folio = folioq_folio(folioq, slot);
+ if (WARN_ONCE(!folio_test_locked(folio),
+ "R=%08x: folio %lx is not locked\n",
+ rreq->debug_id, folio->index))
+ trace_netfs_folio(folio, netfs_folio_trace_not_locked);
+
+ order = folioq_folio_order(folioq, slot);
+ rreq->front_folio_order = order;
+ fsize = PAGE_SIZE << order;
+ fpos = folio_pos(folio);
+ fend = umin(fpos + fsize, rreq->i_size);
+
+ trace_netfs_collect_folio(rreq, folio, fend, collected_to);
- /* Advance the rolling buffer to the next folio. */
+ /* Unlock any folio we've transferred all of. */
+ if (collected_to < fend)
+ break;
+
+ netfs_unlock_read_folio(rreq, folioq, slot);
+ WRITE_ONCE(rreq->cleaned_to, fpos + fsize);
+ *notes |= MADE_PROGRESS;
+
+ clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
+
+ /* Clean up the head folioq. If we clear an entire folioq, then
+ * we can get rid of it provided it's not also the tail folioq
+ * being filled by the issuer.
+ */
+ folioq_clear(folioq, slot);
slot++;
if (slot >= folioq_nr_slots(folioq)) {
+ folioq = rolling_buffer_delete_spent(&rreq->buffer);
+ if (!folioq)
+ goto done;
slot = 0;
- folioq = folioq->next;
- subreq->curr_folioq = folioq;
+ trace_netfs_folioq(folioq, netfs_trace_folioq_read_progress);
}
- subreq->curr_folioq_slot = slot;
- if (folioq && folioq_folio(folioq, slot))
- subreq->curr_folio_order = folioq->orders[slot];
- if (!was_async)
- cond_resched();
- goto next_folio;
+
+ if (fpos + fsize >= collected_to)
+ break;
}
- /* Deal with partial progress. */
- if (subreq->transferred < subreq->len)
- return false;
+ rreq->buffer.tail = folioq;
+done:
+ rreq->buffer.first_tail_slot = slot;
+}
- /* Donate the remaining downloaded data to one of the neighbouring
- * subrequests. Note that we may race with them doing the same thing.
+/*
+ * Collect and assess the results of various read subrequests. We may need to
+ * retry some of the results.
+ *
+ * Note that we have a sequence of subrequests, which may be drawing on
+ * different sources and may or may not be the same size or starting position
+ * and may not even correspond in boundary alignment.
+ */
+static void netfs_collect_read_results(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *front, *remove;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ unsigned int notes;
+
+ _enter("%llx-%llx", rreq->start, rreq->start + rreq->len);
+ trace_netfs_rreq(rreq, netfs_rreq_trace_collect);
+ trace_netfs_collect(rreq);
+
+reassess:
+ if (rreq->origin == NETFS_READAHEAD ||
+ rreq->origin == NETFS_READPAGE ||
+ rreq->origin == NETFS_READ_FOR_WRITE)
+ notes = BUFFERED;
+ else
+ notes = 0;
+
+ /* Remove completed subrequests from the front of the stream and
+ * advance the completion point. We stop when we hit something that's
+ * in progress. The issuer thread may be adding stuff to the tail
+ * whilst we're doing this.
*/
- spin_lock_bh(&rreq->lock);
+ front = READ_ONCE(stream->front);
+ while (front) {
+ size_t transferred;
- if (subreq->prev_donated != prev_donated ||
- subreq->next_donated != next_donated) {
- spin_unlock_bh(&rreq->lock);
- cond_resched();
- goto donation_changed;
- }
+ trace_netfs_collect_sreq(rreq, front);
+ _debug("sreq [%x] %llx %zx/%zx",
+ front->debug_index, front->start, front->transferred, front->len);
- /* Deal with the trickiest case: that this subreq is in the middle of a
- * folio, not touching either edge, but finishes first. In such a
- * case, we donate to the previous subreq, if there is one and if it is
- * contiguous, so that the donation is only handled when that completes
- * - and remove this subreq from the list.
- *
- * If the previous subreq finished first, we will have acquired their
- * donation and should be able to unlock folios and/or donate nextwards.
- */
- if (!subreq->consumed &&
- !prev_donated &&
- !list_is_first(&subreq->rreq_link, &rreq->subrequests) &&
- subreq->start == prev->start + prev->len) {
- prev = list_prev_entry(subreq, rreq_link);
- WRITE_ONCE(prev->next_donated, prev->next_donated + subreq->len);
- subreq->start += subreq->len;
- subreq->len = 0;
- subreq->transferred = 0;
- trace_netfs_donate(rreq, subreq, prev, subreq->len,
- netfs_trace_donate_to_prev);
- trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_prev);
- goto remove_subreq_locked;
+ if (stream->collected_to < front->start) {
+ trace_netfs_collect_gap(rreq, stream, front->start, 'F');
+ stream->collected_to = front->start;
+ }
+
+ if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags))
+ notes |= HIT_PENDING;
+ smp_rmb(); /* Read counters after IN_PROGRESS flag. */
+ transferred = READ_ONCE(front->transferred);
+
+ /* If we can now collect the next folio, do so. We don't want
+ * to defer this as we have to decide whether we need to copy
+ * to the cache or not, and that may differ between adjacent
+ * subreqs.
+ */
+ if (notes & BUFFERED) {
+ size_t fsize = PAGE_SIZE << rreq->front_folio_order;
+
+ /* Clear the tail of a short read. */
+ if (!(notes & HIT_PENDING) &&
+ front->error == 0 &&
+ transferred < front->len &&
+ (test_bit(NETFS_SREQ_HIT_EOF, &front->flags) ||
+ test_bit(NETFS_SREQ_CLEAR_TAIL, &front->flags))) {
+ netfs_clear_unread(front);
+ transferred = front->transferred = front->len;
+ trace_netfs_sreq(front, netfs_sreq_trace_clear);
+ }
+
+ stream->collected_to = front->start + transferred;
+ rreq->collected_to = stream->collected_to;
+
+ if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &front->flags))
+ notes |= COPY_TO_CACHE;
+
+ if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
+ rreq->abandon_to = front->start + front->len;
+ front->transferred = front->len;
+ transferred = front->len;
+ trace_netfs_rreq(rreq, netfs_rreq_trace_set_abandon);
+ }
+ if (front->start + transferred >= rreq->cleaned_to + fsize ||
+ test_bit(NETFS_SREQ_HIT_EOF, &front->flags))
+ netfs_read_unlock_folios(rreq, &notes);
+ } else {
+ stream->collected_to = front->start + transferred;
+ rreq->collected_to = stream->collected_to;
+ }
+
+ /* Stall if the front is still undergoing I/O. */
+ if (notes & HIT_PENDING)
+ break;
+
+ if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
+ if (!stream->failed) {
+ stream->error = front->error;
+ rreq->error = front->error;
+ set_bit(NETFS_RREQ_FAILED, &rreq->flags);
+ stream->failed = true;
+ }
+ notes |= MADE_PROGRESS | ABANDON_SREQ;
+ } else if (test_bit(NETFS_SREQ_NEED_RETRY, &front->flags)) {
+ stream->need_retry = true;
+ notes |= NEED_RETRY | MADE_PROGRESS;
+ break;
+ } else {
+ if (!stream->failed)
+ stream->transferred = stream->collected_to - rreq->start;
+ notes |= MADE_PROGRESS;
+ }
+
+ /* Remove if completely consumed. */
+ stream->source = front->source;
+ spin_lock(&rreq->lock);
+
+ remove = front;
+ trace_netfs_sreq(front, netfs_sreq_trace_discard);
+ list_del_init(&front->rreq_link);
+ front = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ stream->front = front;
+ spin_unlock(&rreq->lock);
+ netfs_put_subrequest(remove, false,
+ notes & ABANDON_SREQ ?
+ netfs_sreq_trace_put_abandon :
+ netfs_sreq_trace_put_done);
}
- /* If we can't donate down the chain, donate up the chain instead. */
- excess = subreq->len - subreq->consumed + next_donated;
+ trace_netfs_collect_stream(rreq, stream);
+ trace_netfs_collect_state(rreq, rreq->collected_to, notes);
- if (!subreq->consumed)
- excess += prev_donated;
+ if (!(notes & BUFFERED))
+ rreq->cleaned_to = rreq->collected_to;
- if (list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
- rreq->prev_donated = excess;
- trace_netfs_donate(rreq, subreq, NULL, excess,
- netfs_trace_donate_to_deferred_next);
- } else {
- next = list_next_entry(subreq, rreq_link);
- WRITE_ONCE(next->prev_donated, excess);
- trace_netfs_donate(rreq, subreq, next, excess,
- netfs_trace_donate_to_next);
+ if (notes & NEED_RETRY)
+ goto need_retry;
+ if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_unpause);
+ clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags);
+ smp_mb__after_atomic(); /* Set PAUSE before task state */
+ wake_up(&rreq->waitq);
+ }
+
+ if (notes & MADE_PROGRESS) {
+ //cond_resched();
+ goto reassess;
}
- trace_netfs_sreq(subreq, netfs_sreq_trace_donate_to_next);
- subreq->len = subreq->consumed;
- subreq->transferred = subreq->consumed;
- goto remove_subreq_locked;
-
-remove_subreq:
- spin_lock_bh(&rreq->lock);
-remove_subreq_locked:
- subreq->consumed = subreq->len;
- list_del(&subreq->rreq_link);
- spin_unlock_bh(&rreq->lock);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_consumed);
- return true;
-
-bad:
- /* Errr... prev and next both donated to us, but insufficient to finish
- * the folio.
+
+out:
+ _leave(" = %x", notes);
+ return;
+
+need_retry:
+ /* Okay... We're going to have to retry parts of the stream. Note
+ * that any partially completed op will have had any wholly transferred
+ * folios removed from it.
*/
- printk("R=%08x[%x] s=%llx-%llx %zx/%zx/%zx\n",
- rreq->debug_id, subreq->debug_index,
- subreq->start, subreq->start + subreq->transferred - 1,
- subreq->consumed, subreq->transferred, subreq->len);
- printk("folio: %llx-%llx\n", fpos, fend - 1);
- printk("donated: prev=%zx next=%zx\n", prev_donated, next_donated);
- printk("s=%llx av=%zx part=%zx\n", start, avail, part);
- BUG();
+ _debug("retry");
+ netfs_retry_reads(rreq);
+ goto out;
}
/*
@@ -321,12 +343,13 @@ bad:
static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
unsigned int i;
/* Collect unbuffered reads and direct reads, adding up the transfer
* sizes until we find the first short or failed subrequest.
*/
- list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
rreq->transferred += subreq->transferred;
if (subreq->transferred < subreq->len ||
@@ -359,25 +382,67 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
}
/*
- * Assess the state of a read request and decide what to do next.
+ * Do processing after reading a monolithic single object.
+ */
+static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
+{
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+
+ if (!rreq->error && stream->source == NETFS_DOWNLOAD_FROM_SERVER &&
+ fscache_resources_valid(&rreq->cache_resources)) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_dirty);
+ netfs_single_mark_inode_dirty(rreq->inode);
+ }
+
+ if (rreq->iocb) {
+ rreq->iocb->ki_pos += rreq->transferred;
+ if (rreq->iocb->ki_complete)
+ rreq->iocb->ki_complete(
+ rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
+ }
+ if (rreq->netfs_ops->done)
+ rreq->netfs_ops->done(rreq);
+}
+
+/*
+ * Perform the collection of subrequests and folios.
*
* Note that we're in normal kernel thread context at this point, possibly
* running on a workqueue.
*/
-static void netfs_rreq_assess(struct netfs_io_request *rreq)
+static void netfs_read_collection(struct netfs_io_request *rreq)
{
- trace_netfs_rreq(rreq, netfs_rreq_trace_assess);
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
- //netfs_rreq_is_still_valid(rreq);
+ netfs_collect_read_results(rreq);
- if (test_and_clear_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags)) {
- netfs_retry_reads(rreq);
+ /* We're done when the app thread has finished posting subreqs and the
+ * queue is empty.
+ */
+ if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
+ return;
+ smp_rmb(); /* Read ALL_QUEUED before subreq lists. */
+
+ if (!list_empty(&stream->subrequests))
return;
- }
- if (rreq->origin == NETFS_DIO_READ ||
- rreq->origin == NETFS_READ_GAPS)
+ /* Okay, declare that all I/O is complete. */
+ rreq->transferred = stream->transferred;
+ trace_netfs_rreq(rreq, netfs_rreq_trace_complete);
+
+ //netfs_rreq_is_still_valid(rreq);
+
+ switch (rreq->origin) {
+ case NETFS_DIO_READ:
+ case NETFS_READ_GAPS:
netfs_rreq_assess_dio(rreq);
+ break;
+ case NETFS_READ_SINGLE:
+ netfs_rreq_assess_single(rreq);
+ break;
+ default:
+ break;
+ }
task_io_account_read(rreq->transferred);
trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
@@ -386,61 +451,66 @@ static void netfs_rreq_assess(struct netfs_io_request *rreq)
trace_netfs_rreq(rreq, netfs_rreq_trace_done);
netfs_clear_subrequests(rreq, false);
netfs_unlock_abandoned_read_pages(rreq);
- if (unlikely(test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)))
- netfs_pgpriv2_write_to_the_cache(rreq);
+ if (unlikely(rreq->copy_to_cache))
+ netfs_pgpriv2_end_copy_to_cache(rreq);
}
-void netfs_read_termination_worker(struct work_struct *work)
+void netfs_read_collection_worker(struct work_struct *work)
{
- struct netfs_io_request *rreq =
- container_of(work, struct netfs_io_request, work);
+ struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
+
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- netfs_rreq_assess(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_work_complete);
+ if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ netfs_read_collection(rreq);
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_work);
}
/*
- * Handle the completion of all outstanding I/O operations on a read request.
- * We inherit a ref from the caller.
+ * Wake the collection work item.
*/
-void netfs_rreq_terminated(struct netfs_io_request *rreq, bool was_async)
+void netfs_wake_read_collector(struct netfs_io_request *rreq)
{
- if (!was_async)
- return netfs_rreq_assess(rreq);
- if (!work_pending(&rreq->work)) {
- netfs_get_request(rreq, netfs_rreq_trace_get_work);
- if (!queue_work(system_unbound_wq, &rreq->work))
- netfs_put_request(rreq, was_async, netfs_rreq_trace_put_work_nq);
+ if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
+ if (!work_pending(&rreq->work)) {
+ netfs_get_request(rreq, netfs_rreq_trace_get_work);
+ if (!queue_work(system_unbound_wq, &rreq->work))
+ netfs_put_request(rreq, true, netfs_rreq_trace_put_work_nq);
+ }
+ } else {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue);
+ wake_up(&rreq->waitq);
}
}
/**
* netfs_read_subreq_progress - Note progress of a read operation.
* @subreq: The read request that has terminated.
- * @was_async: True if we're in an asynchronous context.
*
* This tells the read side of netfs lib that a contributory I/O operation has
* made some progress and that it may be possible to unlock some folios.
*
* Before calling, the filesystem should update subreq->transferred to track
* the amount of data copied into the output buffer.
- *
- * If @was_async is true, the caller might be running in softirq or interrupt
- * context and we can't sleep.
*/
-void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
- bool was_async)
+void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ size_t fsize = PAGE_SIZE << rreq->front_folio_order;
trace_netfs_sreq(subreq, netfs_sreq_trace_progress);
- if (subreq->transferred > subreq->consumed &&
+ /* If we are at the head of the queue, wake up the collector,
+ * getting a ref to it if we were the ones to do so.
+ */
+ if (subreq->start + subreq->transferred > rreq->cleaned_to + fsize &&
(rreq->origin == NETFS_READAHEAD ||
rreq->origin == NETFS_READPAGE ||
- rreq->origin == NETFS_READ_FOR_WRITE)) {
- netfs_consume_read_data(subreq, was_async);
+ rreq->origin == NETFS_READ_FOR_WRITE) &&
+ list_is_first(&subreq->rreq_link, &stream->subrequests)
+ ) {
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+ netfs_wake_read_collector(rreq);
}
}
EXPORT_SYMBOL(netfs_read_subreq_progress);
@@ -448,27 +518,23 @@ EXPORT_SYMBOL(netfs_read_subreq_progress);
/**
* netfs_read_subreq_terminated - Note the termination of an I/O operation.
* @subreq: The I/O request that has terminated.
- * @error: Error code indicating type of completion.
- * @was_async: The termination was asynchronous
*
* This tells the read helper that a contributory I/O operation has terminated,
* one way or another, and that it should integrate the results.
*
- * The caller indicates the outcome of the operation through @error, supplying
- * 0 to indicate a successful or retryable transfer (if NETFS_SREQ_NEED_RETRY
- * is set) or a negative error code. The helper will look after reissuing I/O
- * operations as appropriate and writing downloaded data to the cache.
+ * The caller indicates the outcome of the operation through @subreq->error,
+ * supplying 0 to indicate a successful or retryable transfer (if
+ * NETFS_SREQ_NEED_RETRY is set) or a negative error code. The helper will
+ * look after reissuing I/O operations as appropriate and writing downloaded
+ * data to the cache.
*
* Before calling, the filesystem should update subreq->transferred to track
* the amount of data copied into the output buffer.
- *
- * If @was_async is true, the caller might be running in softirq or interrupt
- * context and we can't sleep.
*/
-void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
- int error, bool was_async)
+void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
switch (subreq->source) {
case NETFS_READ_FROM_CACHE:
@@ -481,71 +547,156 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
break;
}
- if (rreq->origin != NETFS_DIO_READ) {
- /* Collect buffered reads.
- *
- * If the read completed validly short, then we can clear the
- * tail before going on to unlock the folios.
- */
- if (error == 0 && subreq->transferred < subreq->len &&
- (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags) ||
- test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags))) {
- netfs_clear_unread(subreq);
- subreq->transferred = subreq->len;
- trace_netfs_sreq(subreq, netfs_sreq_trace_clear);
- }
- if (subreq->transferred > subreq->consumed &&
- (rreq->origin == NETFS_READAHEAD ||
- rreq->origin == NETFS_READPAGE ||
- rreq->origin == NETFS_READ_FOR_WRITE)) {
- netfs_consume_read_data(subreq, was_async);
- __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
- }
- rreq->transferred += subreq->transferred;
- }
-
/* Deal with retry requests, short reads and errors. If we retry
* but don't make progress, we abandon the attempt.
*/
- if (!error && subreq->transferred < subreq->len) {
+ if (!subreq->error && subreq->transferred < subreq->len) {
if (test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags)) {
trace_netfs_sreq(subreq, netfs_sreq_trace_hit_eof);
+ } else if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_need_clear);
+ } else if (test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_need_retry);
+ } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
+ __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_partial_read);
} else {
+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ subreq->error = -ENODATA;
trace_netfs_sreq(subreq, netfs_sreq_trace_short);
- if (subreq->transferred > subreq->consumed) {
- /* If we didn't read new data, abandon retry. */
- if (subreq->retry_count &&
- test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
- __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
- }
- } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
- __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
- } else {
- __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
- error = -ENODATA;
- }
}
}
- subreq->error = error;
- trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
-
- if (unlikely(error < 0)) {
- trace_netfs_failure(rreq, subreq, error, netfs_fail_read);
+ if (unlikely(subreq->error < 0)) {
+ trace_netfs_failure(rreq, subreq, subreq->error, netfs_fail_read);
if (subreq->source == NETFS_READ_FROM_CACHE) {
netfs_stat(&netfs_n_rh_read_failed);
+ __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
} else {
netfs_stat(&netfs_n_rh_download_failed);
- set_bit(NETFS_RREQ_FAILED, &rreq->flags);
- rreq->error = subreq->error;
+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
}
+ trace_netfs_rreq(rreq, netfs_rreq_trace_set_pause);
+ set_bit(NETFS_RREQ_PAUSE, &rreq->flags);
}
- if (atomic_dec_and_test(&rreq->nr_outstanding))
- netfs_rreq_terminated(rreq, was_async);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
+
+ clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */
+
+ /* If we are at the head of the queue, wake up the collector. */
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests))
+ netfs_wake_read_collector(rreq);
- netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+ netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated);
}
EXPORT_SYMBOL(netfs_read_subreq_terminated);
+
+/*
+ * Handle termination of a read from the cache.
+ */
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async)
+{
+ struct netfs_io_subrequest *subreq = priv;
+
+ if (transferred_or_error > 0) {
+ subreq->error = 0;
+ if (transferred_or_error > 0) {
+ subreq->transferred += transferred_or_error;
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+ }
+ } else {
+ subreq->error = transferred_or_error;
+ }
+ netfs_read_subreq_terminated(subreq);
+}
+
+/*
+ * Wait for the read operation to complete, successfully or otherwise.
+ */
+ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ DEFINE_WAIT(myself);
+ ssize_t ret;
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ subreq = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ if (subreq &&
+ (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
+ __set_current_state(TASK_RUNNING);
+ netfs_read_collection(rreq);
+ continue;
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+ finish_wait(&rreq->waitq, &myself);
+
+ ret = rreq->error;
+ if (ret == 0) {
+ ret = rreq->transferred;
+ switch (rreq->origin) {
+ case NETFS_DIO_READ:
+ case NETFS_READ_SINGLE:
+ ret = rreq->transferred;
+ break;
+ default:
+ if (rreq->submitted < rreq->len) {
+ trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
+ ret = -EIO;
+ }
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Wait for a paused read operation to unpause or complete in some manner.
+ */
+void netfs_wait_for_pause(struct netfs_io_request *rreq)
+{
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ DEFINE_WAIT(myself);
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ subreq = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ if (subreq &&
+ (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
+ __set_current_state(TASK_RUNNING);
+ netfs_read_collection(rreq);
+ continue;
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
+ !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+ finish_wait(&rreq->waitq, &myself);
+}
diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c
index 54d5004fec18..cf7727060215 100644
--- a/fs/netfs/read_pgpriv2.c
+++ b/fs/netfs/read_pgpriv2.c
@@ -14,52 +14,11 @@
#include "internal.h"
/*
- * [DEPRECATED] Mark page as requiring copy-to-cache using PG_private_2. The
- * third mark in the folio queue is used to indicate that this folio needs
- * writing.
- */
-void netfs_pgpriv2_mark_copy_to_cache(struct netfs_io_subrequest *subreq,
- struct netfs_io_request *rreq,
- struct folio_queue *folioq,
- int slot)
-{
- struct folio *folio = folioq_folio(folioq, slot);
-
- trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
- folio_start_private_2(folio);
- folioq_mark3(folioq, slot);
-}
-
-/*
- * [DEPRECATED] Cancel PG_private_2 on all marked folios in the event of an
- * unrecoverable error.
- */
-static void netfs_pgpriv2_cancel(struct folio_queue *folioq)
-{
- struct folio *folio;
- int slot;
-
- while (folioq) {
- if (!folioq->marks3) {
- folioq = folioq->next;
- continue;
- }
-
- slot = __ffs(folioq->marks3);
- folio = folioq_folio(folioq, slot);
-
- trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
- folio_end_private_2(folio);
- folioq_unmark3(folioq, slot);
- }
-}
-
-/*
* [DEPRECATED] Copy a folio to the cache with PG_private_2 set.
*/
-static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio *folio)
+static void netfs_pgpriv2_copy_folio(struct netfs_io_request *creq, struct folio *folio)
{
- struct netfs_io_stream *cache = &wreq->io_streams[1];
+ struct netfs_io_stream *cache = &creq->io_streams[1];
size_t fsize = folio_size(folio), flen = fsize;
loff_t fpos = folio_pos(folio), i_size;
bool to_eof = false;
@@ -70,17 +29,17 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
* of the page to beyond it, but cannot move i_size into or through the
* page since we have it locked.
*/
- i_size = i_size_read(wreq->inode);
+ i_size = i_size_read(creq->inode);
if (fpos >= i_size) {
/* mmap beyond eof. */
_debug("beyond eof");
folio_end_private_2(folio);
- return 0;
+ return;
}
- if (fpos + fsize > wreq->i_size)
- wreq->i_size = i_size;
+ if (fpos + fsize > creq->i_size)
+ creq->i_size = i_size;
if (flen > i_size - fpos) {
flen = i_size - fpos;
@@ -94,8 +53,10 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
trace_netfs_folio(folio, netfs_folio_trace_store_copy);
/* Attach the folio to the rolling buffer. */
- if (netfs_buffer_append_folio(wreq, folio, false) < 0)
- return -ENOMEM;
+ if (rolling_buffer_append(&creq->buffer, folio, 0) < 0) {
+ clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &creq->flags);
+ return;
+ }
cache->submit_extendable_to = fsize;
cache->submit_off = 0;
@@ -109,11 +70,11 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
do {
ssize_t part;
- wreq->io_iter.iov_offset = cache->submit_off;
+ creq->buffer.iter.iov_offset = cache->submit_off;
- atomic64_set(&wreq->issued_to, fpos + cache->submit_off);
+ atomic64_set(&creq->issued_to, fpos + cache->submit_off);
cache->submit_extendable_to = fsize - cache->submit_off;
- part = netfs_advance_write(wreq, cache, fpos + cache->submit_off,
+ part = netfs_advance_write(creq, cache, fpos + cache->submit_off,
cache->submit_len, to_eof);
cache->submit_off += part;
if (part > cache->submit_len)
@@ -122,98 +83,95 @@ static int netfs_pgpriv2_copy_folio(struct netfs_io_request *wreq, struct folio
cache->submit_len -= part;
} while (cache->submit_len > 0);
- wreq->io_iter.iov_offset = 0;
- iov_iter_advance(&wreq->io_iter, fsize);
- atomic64_set(&wreq->issued_to, fpos + fsize);
+ creq->buffer.iter.iov_offset = 0;
+ rolling_buffer_advance(&creq->buffer, fsize);
+ atomic64_set(&creq->issued_to, fpos + fsize);
if (flen < fsize)
- netfs_issue_write(wreq, cache);
-
- _leave(" = 0");
- return 0;
+ netfs_issue_write(creq, cache);
}
/*
- * [DEPRECATED] Go through the buffer and write any folios that are marked with
- * the third mark to the cache.
+ * [DEPRECATED] Set up copying to the cache.
*/
-void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq)
+static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache(
+ struct netfs_io_request *rreq, struct folio *folio)
{
- struct netfs_io_request *wreq;
- struct folio_queue *folioq;
- struct folio *folio;
- int error = 0;
- int slot = 0;
-
- _enter("");
+ struct netfs_io_request *creq;
if (!fscache_resources_valid(&rreq->cache_resources))
- goto couldnt_start;
+ goto cancel;
- /* Need the first folio to be able to set up the op. */
- for (folioq = rreq->buffer; folioq; folioq = folioq->next) {
- if (folioq->marks3) {
- slot = __ffs(folioq->marks3);
- break;
- }
- }
- if (!folioq)
- return;
- folio = folioq_folio(folioq, slot);
-
- wreq = netfs_create_write_req(rreq->mapping, NULL, folio_pos(folio),
+ creq = netfs_create_write_req(rreq->mapping, NULL, folio_pos(folio),
NETFS_PGPRIV2_COPY_TO_CACHE);
- if (IS_ERR(wreq)) {
- kleave(" [create %ld]", PTR_ERR(wreq));
- goto couldnt_start;
- }
+ if (IS_ERR(creq))
+ goto cancel;
- trace_netfs_write(wreq, netfs_write_trace_copy_to_cache);
+ if (!creq->io_streams[1].avail)
+ goto cancel_put;
+
+ trace_netfs_write(creq, netfs_write_trace_copy_to_cache);
netfs_stat(&netfs_n_wh_copy_to_cache);
- if (!wreq->io_streams[1].avail) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- goto couldnt_start;
- }
+ rreq->copy_to_cache = creq;
+ return creq;
+
+cancel_put:
+ netfs_put_request(creq, false, netfs_rreq_trace_put_return);
+cancel:
+ rreq->copy_to_cache = ERR_PTR(-ENOBUFS);
+ clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
+ return ERR_PTR(-ENOBUFS);
+}
- for (;;) {
- error = netfs_pgpriv2_copy_folio(wreq, folio);
- if (error < 0)
- break;
+/*
+ * [DEPRECATED] Mark page as requiring copy-to-cache using PG_private_2 and add
+ * it to the copy write request.
+ */
+void netfs_pgpriv2_copy_to_cache(struct netfs_io_request *rreq, struct folio *folio)
+{
+ struct netfs_io_request *creq = rreq->copy_to_cache;
- folioq_unmark3(folioq, slot);
- if (!folioq->marks3) {
- folioq = folioq->next;
- if (!folioq)
- break;
- }
+ if (!creq)
+ creq = netfs_pgpriv2_begin_copy_to_cache(rreq, folio);
+ if (IS_ERR(creq))
+ return;
- slot = __ffs(folioq->marks3);
- folio = folioq_folio(folioq, slot);
- }
+ trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
+ folio_start_private_2(folio);
+ netfs_pgpriv2_copy_folio(creq, folio);
+}
- netfs_issue_write(wreq, &wreq->io_streams[1]);
+/*
+ * [DEPRECATED] End writing to the cache, flushing out any outstanding writes.
+ */
+void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq)
+{
+ struct netfs_io_request *creq = rreq->copy_to_cache;
+
+ if (IS_ERR_OR_NULL(creq))
+ return;
+
+ netfs_issue_write(creq, &creq->io_streams[1]);
smp_wmb(); /* Write lists before ALL_QUEUED. */
- set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+ set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- _leave(" = %d", error);
-couldnt_start:
- netfs_pgpriv2_cancel(rreq->buffer);
+ netfs_put_request(creq, false, netfs_rreq_trace_put_return);
+ creq->copy_to_cache = NULL;
}
/*
* [DEPRECATED] Remove the PG_private_2 mark from any folios we've finished
* copying.
*/
-bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
+bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *creq)
{
- struct folio_queue *folioq = wreq->buffer;
- unsigned long long collected_to = wreq->collected_to;
- unsigned int slot = wreq->buffer_head_slot;
+ struct folio_queue *folioq = creq->buffer.tail;
+ unsigned long long collected_to = creq->collected_to;
+ unsigned int slot = creq->buffer.first_tail_slot;
bool made_progress = false;
if (slot >= folioq_nr_slots(folioq)) {
- folioq = netfs_delete_buffer_head(wreq);
+ folioq = rolling_buffer_delete_spent(&creq->buffer);
slot = 0;
}
@@ -225,16 +183,16 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
folio = folioq_folio(folioq, slot);
if (WARN_ONCE(!folio_test_private_2(folio),
"R=%08x: folio %lx is not marked private_2\n",
- wreq->debug_id, folio->index))
+ creq->debug_id, folio->index))
trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
fpos = folio_pos(folio);
fsize = folio_size(folio);
flen = fsize;
- fend = min_t(unsigned long long, fpos + flen, wreq->i_size);
+ fend = min_t(unsigned long long, fpos + flen, creq->i_size);
- trace_netfs_collect_folio(wreq, folio, fend, collected_to);
+ trace_netfs_collect_folio(creq, folio, fend, collected_to);
/* Unlock any folio we've transferred all of. */
if (collected_to < fend)
@@ -242,7 +200,7 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
trace_netfs_folio(folio, netfs_folio_trace_end_copy);
folio_end_private_2(folio);
- wreq->cleaned_to = fpos + fsize;
+ creq->cleaned_to = fpos + fsize;
made_progress = true;
/* Clean up the head folioq. If we clear an entire folioq, then
@@ -252,9 +210,9 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
folioq_clear(folioq, slot);
slot++;
if (slot >= folioq_nr_slots(folioq)) {
- if (READ_ONCE(wreq->buffer_tail) == folioq)
- break;
- folioq = netfs_delete_buffer_head(wreq);
+ folioq = rolling_buffer_delete_spent(&creq->buffer);
+ if (!folioq)
+ goto done;
slot = 0;
}
@@ -262,7 +220,8 @@ bool netfs_pgpriv2_unlock_copied_folios(struct netfs_io_request *wreq)
break;
}
- wreq->buffer = folioq;
- wreq->buffer_head_slot = slot;
+ creq->buffer.tail = folioq;
+done:
+ creq->buffer.first_tail_slot = slot;
return made_progress;
}
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index 21b4a54e545e..bf6f26525b0d 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -12,15 +12,7 @@
static void netfs_reissue_read(struct netfs_io_request *rreq,
struct netfs_io_subrequest *subreq)
{
- struct iov_iter *io_iter = &subreq->io_iter;
-
- if (iov_iter_is_folioq(io_iter)) {
- subreq->curr_folioq = (struct folio_queue *)io_iter->folioq;
- subreq->curr_folioq_slot = io_iter->folioq_slot;
- subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
- }
-
- atomic_inc(&rreq->nr_outstanding);
+ __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
subreq->rreq->netfs_ops->issue_read(subreq);
@@ -33,13 +25,12 @@ static void netfs_reissue_read(struct netfs_io_request *rreq,
static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream0 = &rreq->io_streams[0];
- LIST_HEAD(sublist);
- LIST_HEAD(queue);
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ struct list_head *next;
_enter("R=%x", rreq->debug_id);
- if (list_empty(&rreq->subrequests))
+ if (list_empty(&stream->subrequests))
return;
if (rreq->netfs_ops->retry_request)
@@ -50,9 +41,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
*/
if (!rreq->netfs_ops->prepare_read &&
!rreq->cache_resources.ops) {
- struct netfs_io_subrequest *subreq;
-
- list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
break;
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
@@ -75,48 +64,44 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
* populating with smaller subrequests. In the event that the subreq
* we just launched finishes before we insert the next subreq, it'll
* fill in rreq->prev_donated instead.
-
+ *
* Note: Alternatively, we could split the tail subrequest right before
* we reissue it and fix up the donations under lock.
*/
- list_splice_init(&rreq->subrequests, &queue);
+ next = stream->subrequests.next;
do {
- struct netfs_io_subrequest *from;
+ struct netfs_io_subrequest *from, *to, *tmp;
struct iov_iter source;
unsigned long long start, len;
- size_t part, deferred_next_donated = 0;
+ size_t part;
bool boundary = false;
/* Go through the subreqs and find the next span of contiguous
* buffer that we then rejig (cifs, for example, needs the
* rsize renegotiating) and reissue.
*/
- from = list_first_entry(&queue, struct netfs_io_subrequest, rreq_link);
- list_move_tail(&from->rreq_link, &sublist);
+ from = list_entry(next, struct netfs_io_subrequest, rreq_link);
+ to = from;
start = from->start + from->transferred;
len = from->len - from->transferred;
- _debug("from R=%08x[%x] s=%llx ctl=%zx/%zx/%zx",
+ _debug("from R=%08x[%x] s=%llx ctl=%zx/%zx",
rreq->debug_id, from->debug_index,
- from->start, from->consumed, from->transferred, from->len);
+ from->start, from->transferred, from->len);
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
!test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
goto abandon;
- deferred_next_donated = from->next_donated;
- while ((subreq = list_first_entry_or_null(
- &queue, struct netfs_io_subrequest, rreq_link))) {
- if (subreq->start != start + len ||
- subreq->transferred > 0 ||
+ list_for_each_continue(next, &stream->subrequests) {
+ subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
+ if (subreq->start + subreq->transferred != start + len ||
+ test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
!test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
break;
- list_move_tail(&subreq->rreq_link, &sublist);
- len += subreq->len;
- deferred_next_donated = subreq->next_donated;
- if (test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags))
- break;
+ to = subreq;
+ len += to->len;
}
_debug(" - range: %llx-%llx %llx", start, start + len - 1, len);
@@ -129,37 +114,30 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
source.count = len;
/* Work through the sublist. */
- while ((subreq = list_first_entry_or_null(
- &sublist, struct netfs_io_subrequest, rreq_link))) {
- list_del(&subreq->rreq_link);
-
+ subreq = from;
+ list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
+ if (!len)
+ break;
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->start = start - subreq->transferred;
subreq->len = len + subreq->transferred;
- stream0->sreq_max_len = subreq->len;
-
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
__clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
subreq->retry_count++;
- spin_lock_bh(&rreq->lock);
- list_add_tail(&subreq->rreq_link, &rreq->subrequests);
- subreq->prev_donated += rreq->prev_donated;
- rreq->prev_donated = 0;
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
- spin_unlock_bh(&rreq->lock);
-
- BUG_ON(!len);
/* Renegotiate max_len (rsize) */
+ stream->sreq_max_len = subreq->len;
if (rreq->netfs_ops->prepare_read(subreq) < 0) {
trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
__set_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ goto abandon;
}
- part = umin(len, stream0->sreq_max_len);
- if (unlikely(rreq->io_streams[0].sreq_max_segs))
- part = netfs_limit_iter(&source, 0, part, stream0->sreq_max_segs);
+ part = umin(len, stream->sreq_max_len);
+ if (unlikely(stream->sreq_max_segs))
+ part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
subreq->len = subreq->transferred + part;
subreq->io_iter = source;
iov_iter_truncate(&subreq->io_iter, part);
@@ -169,57 +147,105 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
if (!len) {
if (boundary)
__set_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
- subreq->next_donated = deferred_next_donated;
} else {
__clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags);
- subreq->next_donated = 0;
}
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_read(rreq, subreq);
- if (!len)
+ if (subreq == to)
break;
-
- /* If we ran out of subrequests, allocate another. */
- if (list_empty(&sublist)) {
- subreq = netfs_alloc_subrequest(rreq);
- if (!subreq)
- goto abandon;
- subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
- subreq->start = start;
-
- /* We get two refs, but need just one. */
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_new);
- trace_netfs_sreq(subreq, netfs_sreq_trace_split);
- list_add_tail(&subreq->rreq_link, &sublist);
- }
}
/* If we managed to use fewer subreqs, we can discard the
- * excess.
+ * excess; if we used the same number, then we're done.
*/
- while ((subreq = list_first_entry_or_null(
- &sublist, struct netfs_io_subrequest, rreq_link))) {
- trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
- list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ if (!len) {
+ if (subreq == to)
+ continue;
+ list_for_each_entry_safe_from(subreq, tmp,
+ &stream->subrequests, rreq_link) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
+ list_del(&subreq->rreq_link);
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ if (subreq == to)
+ break;
+ }
+ continue;
}
- } while (!list_empty(&queue));
+ /* We ran out of subrequests, so we need to allocate some more
+ * and insert them after.
+ */
+ do {
+ subreq = netfs_alloc_subrequest(rreq);
+ if (!subreq) {
+ subreq = to;
+ goto abandon_after;
+ }
+ subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
+ subreq->start = start;
+ subreq->len = len;
+ subreq->debug_index = atomic_inc_return(&rreq->subreq_counter);
+ subreq->stream_nr = stream->stream_nr;
+ subreq->retry_count = 1;
+
+ trace_netfs_sreq_ref(rreq->debug_id, subreq->debug_index,
+ refcount_read(&subreq->ref),
+ netfs_sreq_trace_new);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+
+ list_add(&subreq->rreq_link, &to->rreq_link);
+ to = list_next_entry(to, rreq_link);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
+
+ stream->sreq_max_len = umin(len, rreq->rsize);
+ stream->sreq_max_segs = 0;
+ if (unlikely(stream->sreq_max_segs))
+ part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
+
+ netfs_stat(&netfs_n_rh_download);
+ if (rreq->netfs_ops->prepare_read(subreq) < 0) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed);
+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ goto abandon;
+ }
+
+ part = umin(len, stream->sreq_max_len);
+ subreq->len = subreq->transferred + part;
+ subreq->io_iter = source;
+ iov_iter_truncate(&subreq->io_iter, part);
+ iov_iter_advance(&source, part);
+
+ len -= part;
+ start += part;
+ if (!len && boundary) {
+ __set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
+ boundary = false;
+ }
+
+ netfs_reissue_read(rreq, subreq);
+ } while (len);
+
+ } while (!list_is_head(next, &stream->subrequests));
return;
- /* If we hit ENOMEM, fail all remaining subrequests */
+ /* If we hit an error, fail all remaining incomplete subrequests */
+abandon_after:
+ if (list_is_last(&subreq->rreq_link, &stream->subrequests))
+ return;
+ subreq = list_next_entry(subreq, rreq_link);
abandon:
- list_splice_init(&sublist, &queue);
- list_for_each_entry(subreq, &queue, rreq_link) {
- if (!subreq->error)
- subreq->error = -ENOMEM;
- __clear_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
+ if (!subreq->error &&
+ !test_bit(NETFS_SREQ_FAILED, &subreq->flags) &&
+ !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
+ continue;
+ subreq->error = -ENOMEM;
+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
}
- spin_lock_bh(&rreq->lock);
- list_splice_tail_init(&queue, &rreq->subrequests);
- spin_unlock_bh(&rreq->lock);
}
/*
@@ -227,14 +253,19 @@ abandon:
*/
void netfs_retry_reads(struct netfs_io_request *rreq)
{
- trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
- atomic_inc(&rreq->nr_outstanding);
+ /* Wait for all outstanding I/O to quiesce before performing retries as
+ * we may need to renegotiate the I/O sizes.
+ */
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE);
+ }
+ trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
netfs_retry_read_subrequests(rreq);
-
- if (atomic_dec_and_test(&rreq->nr_outstanding))
- netfs_rreq_terminated(rreq, false);
}
/*
@@ -245,7 +276,7 @@ void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq)
{
struct folio_queue *p;
- for (p = rreq->buffer; p; p = p->next) {
+ for (p = rreq->buffer.tail; p; p = p->next) {
for (int slot = 0; slot < folioq_count(p); slot++) {
struct folio *folio = folioq_folio(p, slot);
diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c
new file mode 100644
index 000000000000..fea0ecdecc53
--- /dev/null
+++ b/fs/netfs/read_single.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Single, monolithic object support (e.g. AFS directory).
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/sched/mm.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/netfs.h>
+#include "internal.h"
+
+/**
+ * netfs_single_mark_inode_dirty - Mark a single, monolithic object inode dirty
+ * @inode: The inode to mark
+ *
+ * Mark an inode that contains a single, monolithic object as dirty so that its
+ * writepages op will get called. If set, the SINGLE_NO_UPLOAD flag indicates
+ * that the object will only be written to the cache and not uploaded (e.g. AFS
+ * directory contents).
+ */
+void netfs_single_mark_inode_dirty(struct inode *inode)
+{
+ struct netfs_inode *ictx = netfs_inode(inode);
+ bool cache_only = test_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &ictx->flags);
+ bool caching = fscache_cookie_enabled(netfs_i_cookie(netfs_inode(inode)));
+
+ if (cache_only && !caching)
+ return;
+
+ mark_inode_dirty(inode);
+
+ if (caching && !(inode->i_state & I_PINNING_NETFS_WB)) {
+ bool need_use = false;
+
+ spin_lock(&inode->i_lock);
+ if (!(inode->i_state & I_PINNING_NETFS_WB)) {
+ inode->i_state |= I_PINNING_NETFS_WB;
+ need_use = true;
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (need_use)
+ fscache_use_cookie(netfs_i_cookie(ictx), true);
+ }
+
+}
+EXPORT_SYMBOL(netfs_single_mark_inode_dirty);
+
+static int netfs_single_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
+{
+ return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
+}
+
+static void netfs_single_cache_prepare_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
+ struct netfs_cache_resources *cres = &rreq->cache_resources;
+
+ if (!cres->ops) {
+ subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
+ return;
+ }
+ subreq->source = cres->ops->prepare_read(subreq, rreq->i_size);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+
+}
+
+static void netfs_single_read_cache(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
+ struct netfs_cache_resources *cres = &rreq->cache_resources;
+
+ _enter("R=%08x[%x]", rreq->debug_id, subreq->debug_index);
+ netfs_stat(&netfs_n_rh_read);
+ cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_FAIL,
+ netfs_cache_read_terminated, subreq);
+}
+
+/*
+ * Perform a read to a buffer from the cache or the server. Only a single
+ * subreq is permitted as the object must be fetched in a single transaction.
+ */
+static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
+{
+ struct netfs_io_stream *stream = &rreq->io_streams[0];
+ struct netfs_io_subrequest *subreq;
+ int ret = 0;
+
+ subreq = netfs_alloc_subrequest(rreq);
+ if (!subreq)
+ return -ENOMEM;
+
+ subreq->source = NETFS_SOURCE_UNKNOWN;
+ subreq->start = 0;
+ subreq->len = rreq->len;
+ subreq->io_iter = rreq->buffer.iter;
+
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+
+ spin_lock(&rreq->lock);
+ list_add_tail(&subreq->rreq_link, &stream->subrequests);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_added);
+ stream->front = subreq;
+ /* Store list pointers before active flag */
+ smp_store_release(&stream->active, true);
+ spin_unlock(&rreq->lock);
+
+ netfs_single_cache_prepare_read(rreq, subreq);
+ switch (subreq->source) {
+ case NETFS_DOWNLOAD_FROM_SERVER:
+ netfs_stat(&netfs_n_rh_download);
+ if (rreq->netfs_ops->prepare_read) {
+ ret = rreq->netfs_ops->prepare_read(subreq);
+ if (ret < 0)
+ goto cancel;
+ }
+
+ rreq->netfs_ops->issue_read(subreq);
+ rreq->submitted += subreq->len;
+ break;
+ case NETFS_READ_FROM_CACHE:
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+ netfs_single_read_cache(rreq, subreq);
+ rreq->submitted += subreq->len;
+ ret = 0;
+ break;
+ default:
+ pr_warn("Unexpected single-read source %u\n", subreq->source);
+ WARN_ON_ONCE(true);
+ ret = -EIO;
+ break;
+ }
+
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
+ return ret;
+cancel:
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ return ret;
+}
+
+/**
+ * netfs_read_single - Synchronously read a single blob of pages.
+ * @inode: The inode to read from.
+ * @file: The file we're using to read or NULL.
+ * @iter: The buffer we're reading into.
+ *
+ * Fulfil a read request for a single monolithic object by drawing data from
+ * the cache if possible, or the netfs if not. The buffer may be larger than
+ * the file content; unused beyond the EOF will be zero-filled. The content
+ * will be read with a single I/O request (though this may be retried).
+ *
+ * The calling netfs must initialise a netfs context contiguous to the vfs
+ * inode before calling this.
+ *
+ * This is usable whether or not caching is enabled. If caching is enabled,
+ * the data will be stored as a single object into the cache.
+ */
+ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_iter *iter)
+{
+ struct netfs_io_request *rreq;
+ struct netfs_inode *ictx = netfs_inode(inode);
+ ssize_t ret;
+
+ rreq = netfs_alloc_request(inode->i_mapping, file, 0, iov_iter_count(iter),
+ NETFS_READ_SINGLE);
+ if (IS_ERR(rreq))
+ return PTR_ERR(rreq);
+
+ ret = netfs_single_begin_cache_read(rreq, ictx);
+ if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
+ goto cleanup_free;
+
+ netfs_stat(&netfs_n_rh_read_single);
+ trace_netfs_read(rreq, 0, rreq->len, netfs_read_trace_read_single);
+
+ rreq->buffer.iter = *iter;
+ netfs_single_dispatch_read(rreq);
+
+ ret = netfs_wait_for_read(rreq);
+ netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
+ return ret;
+
+cleanup_free:
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_read_single);
diff --git a/fs/netfs/rolling_buffer.c b/fs/netfs/rolling_buffer.c
new file mode 100644
index 000000000000..75d97af14b4a
--- /dev/null
+++ b/fs/netfs/rolling_buffer.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Rolling buffer helpers
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/bitops.h>
+#include <linux/pagemap.h>
+#include <linux/rolling_buffer.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+static atomic_t debug_ids;
+
+/**
+ * netfs_folioq_alloc - Allocate a folio_queue struct
+ * @rreq_id: Associated debugging ID for tracing purposes
+ * @gfp: Allocation constraints
+ * @trace: Trace tag to indicate the purpose of the allocation
+ *
+ * Allocate, initialise and account the folio_queue struct and log a trace line
+ * to mark the allocation.
+ */
+struct folio_queue *netfs_folioq_alloc(unsigned int rreq_id, gfp_t gfp,
+ unsigned int /*enum netfs_folioq_trace*/ trace)
+{
+ struct folio_queue *fq;
+
+ fq = kmalloc(sizeof(*fq), gfp);
+ if (fq) {
+ netfs_stat(&netfs_n_folioq);
+ folioq_init(fq, rreq_id);
+ fq->debug_id = atomic_inc_return(&debug_ids);
+ trace_netfs_folioq(fq, trace);
+ }
+ return fq;
+}
+EXPORT_SYMBOL(netfs_folioq_alloc);
+
+/**
+ * netfs_folioq_free - Free a folio_queue struct
+ * @folioq: The object to free
+ * @trace: Trace tag to indicate which free
+ *
+ * Free and unaccount the folio_queue struct.
+ */
+void netfs_folioq_free(struct folio_queue *folioq,
+ unsigned int /*enum netfs_trace_folioq*/ trace)
+{
+ trace_netfs_folioq(folioq, trace);
+ netfs_stat_d(&netfs_n_folioq);
+ kfree(folioq);
+}
+EXPORT_SYMBOL(netfs_folioq_free);
+
+/*
+ * Initialise a rolling buffer. We allocate an empty folio queue struct to so
+ * that the pointers can be independently driven by the producer and the
+ * consumer.
+ */
+int rolling_buffer_init(struct rolling_buffer *roll, unsigned int rreq_id,
+ unsigned int direction)
+{
+ struct folio_queue *fq;
+
+ fq = netfs_folioq_alloc(rreq_id, GFP_NOFS, netfs_trace_folioq_rollbuf_init);
+ if (!fq)
+ return -ENOMEM;
+
+ roll->head = fq;
+ roll->tail = fq;
+ iov_iter_folio_queue(&roll->iter, direction, fq, 0, 0, 0);
+ return 0;
+}
+
+/*
+ * Add another folio_queue to a rolling buffer if there's no space left.
+ */
+int rolling_buffer_make_space(struct rolling_buffer *roll)
+{
+ struct folio_queue *fq, *head = roll->head;
+
+ if (!folioq_full(head))
+ return 0;
+
+ fq = netfs_folioq_alloc(head->rreq_id, GFP_NOFS, netfs_trace_folioq_make_space);
+ if (!fq)
+ return -ENOMEM;
+ fq->prev = head;
+
+ roll->head = fq;
+ if (folioq_full(head)) {
+ /* Make sure we don't leave the master iterator pointing to a
+ * block that might get immediately consumed.
+ */
+ if (roll->iter.folioq == head &&
+ roll->iter.folioq_slot == folioq_nr_slots(head)) {
+ roll->iter.folioq = fq;
+ roll->iter.folioq_slot = 0;
+ }
+ }
+
+ /* Make sure the initialisation is stored before the next pointer.
+ *
+ * [!] NOTE: After we set head->next, the consumer is at liberty to
+ * immediately delete the old head.
+ */
+ smp_store_release(&head->next, fq);
+ return 0;
+}
+
+/*
+ * Decant the list of folios to read into a rolling buffer.
+ */
+ssize_t rolling_buffer_load_from_ra(struct rolling_buffer *roll,
+ struct readahead_control *ractl,
+ struct folio_batch *put_batch)
+{
+ struct folio_queue *fq;
+ struct page **vec;
+ int nr, ix, to;
+ ssize_t size = 0;
+
+ if (rolling_buffer_make_space(roll) < 0)
+ return -ENOMEM;
+
+ fq = roll->head;
+ vec = (struct page **)fq->vec.folios;
+ nr = __readahead_batch(ractl, vec + folio_batch_count(&fq->vec),
+ folio_batch_space(&fq->vec));
+ ix = fq->vec.nr;
+ to = ix + nr;
+ fq->vec.nr = to;
+ for (; ix < to; ix++) {
+ struct folio *folio = folioq_folio(fq, ix);
+ unsigned int order = folio_order(folio);
+
+ fq->orders[ix] = order;
+ size += PAGE_SIZE << order;
+ trace_netfs_folio(folio, netfs_folio_trace_read);
+ if (!folio_batch_add(put_batch, folio))
+ folio_batch_release(put_batch);
+ }
+ WRITE_ONCE(roll->iter.count, roll->iter.count + size);
+
+ /* Store the counter after setting the slot. */
+ smp_store_release(&roll->next_head_slot, to);
+
+ for (; ix < folioq_nr_slots(fq); ix++)
+ folioq_clear(fq, ix);
+
+ return size;
+}
+
+/*
+ * Append a folio to the rolling buffer.
+ */
+ssize_t rolling_buffer_append(struct rolling_buffer *roll, struct folio *folio,
+ unsigned int flags)
+{
+ ssize_t size = folio_size(folio);
+ int slot;
+
+ if (rolling_buffer_make_space(roll) < 0)
+ return -ENOMEM;
+
+ slot = folioq_append(roll->head, folio);
+ if (flags & ROLLBUF_MARK_1)
+ folioq_mark(roll->head, slot);
+ if (flags & ROLLBUF_MARK_2)
+ folioq_mark2(roll->head, slot);
+
+ WRITE_ONCE(roll->iter.count, roll->iter.count + size);
+
+ /* Store the counter after setting the slot. */
+ smp_store_release(&roll->next_head_slot, slot);
+ return size;
+}
+
+/*
+ * Delete a spent buffer from a rolling queue and return the next in line. We
+ * don't return the last buffer to keep the pointers independent, but return
+ * NULL instead.
+ */
+struct folio_queue *rolling_buffer_delete_spent(struct rolling_buffer *roll)
+{
+ struct folio_queue *spent = roll->tail, *next = READ_ONCE(spent->next);
+
+ if (!next)
+ return NULL;
+ next->prev = NULL;
+ netfs_folioq_free(spent, netfs_trace_folioq_delete);
+ roll->tail = next;
+ return next;
+}
+
+/*
+ * Clear out a rolling queue. Folios that have mark 1 set are put.
+ */
+void rolling_buffer_clear(struct rolling_buffer *roll)
+{
+ struct folio_batch fbatch;
+ struct folio_queue *p;
+
+ folio_batch_init(&fbatch);
+
+ while ((p = roll->tail)) {
+ roll->tail = p->next;
+ for (int slot = 0; slot < folioq_count(p); slot++) {
+ struct folio *folio = folioq_folio(p, slot);
+
+ if (!folio)
+ continue;
+ if (folioq_is_marked(p, slot)) {
+ trace_netfs_folio(folio, netfs_folio_trace_put);
+ if (!folio_batch_add(&fbatch, folio))
+ folio_batch_release(&fbatch);
+ }
+ }
+
+ netfs_folioq_free(p, netfs_trace_folioq_clear);
+ }
+
+ folio_batch_release(&fbatch);
+}
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index 8e63516b40f6..f1af344266cc 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -12,6 +12,7 @@
atomic_t netfs_n_rh_dio_read;
atomic_t netfs_n_rh_readahead;
atomic_t netfs_n_rh_read_folio;
+atomic_t netfs_n_rh_read_single;
atomic_t netfs_n_rh_rreq;
atomic_t netfs_n_rh_sreq;
atomic_t netfs_n_rh_download;
@@ -46,10 +47,11 @@ atomic_t netfs_n_folioq;
int netfs_stats_show(struct seq_file *m, void *v)
{
- seq_printf(m, "Reads : DR=%u RA=%u RF=%u WB=%u WBZ=%u\n",
+ seq_printf(m, "Reads : DR=%u RA=%u RF=%u RS=%u WB=%u WBZ=%u\n",
atomic_read(&netfs_n_rh_dio_read),
atomic_read(&netfs_n_rh_readahead),
atomic_read(&netfs_n_rh_read_folio),
+ atomic_read(&netfs_n_rh_read_single),
atomic_read(&netfs_n_rh_write_begin),
atomic_read(&netfs_n_rh_write_zskip));
seq_printf(m, "Writes : BW=%u WT=%u DW=%u WP=%u 2C=%u\n",
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index ca3a11ed9b54..294f67795f79 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -17,10 +17,38 @@
#define HIT_PENDING 0x01 /* A front op was still pending */
#define NEED_REASSESS 0x02 /* Need to loop round and reassess */
#define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */
-#define BUFFERED 0x08 /* The pagecache needs cleaning up */
+#define NEED_UNLOCK 0x08 /* The pagecache needs unlocking */
#define NEED_RETRY 0x10 /* A front op requests retrying */
#define SAW_FAILURE 0x20 /* One stream or hit a permanent failure */
+static void netfs_dump_request(const struct netfs_io_request *rreq)
+{
+ pr_err("Request R=%08x r=%d fl=%lx or=%x e=%ld\n",
+ rreq->debug_id, refcount_read(&rreq->ref), rreq->flags,
+ rreq->origin, rreq->error);
+ pr_err(" st=%llx tsl=%zx/%llx/%llx\n",
+ rreq->start, rreq->transferred, rreq->submitted, rreq->len);
+ pr_err(" cci=%llx/%llx/%llx\n",
+ rreq->cleaned_to, rreq->collected_to, atomic64_read(&rreq->issued_to));
+ pr_err(" iw=%pSR\n", rreq->netfs_ops->issue_write);
+ for (int i = 0; i < NR_IO_STREAMS; i++) {
+ const struct netfs_io_subrequest *sreq;
+ const struct netfs_io_stream *s = &rreq->io_streams[i];
+
+ pr_err(" str[%x] s=%x e=%d acnf=%u,%u,%u,%u\n",
+ s->stream_nr, s->source, s->error,
+ s->avail, s->active, s->need_retry, s->failed);
+ pr_err(" str[%x] ct=%llx t=%zx\n",
+ s->stream_nr, s->collected_to, s->transferred);
+ list_for_each_entry(sreq, &s->subrequests, rreq_link) {
+ pr_err(" sreq[%x:%x] sc=%u s=%llx t=%zx/%zx r=%d f=%lx\n",
+ sreq->stream_nr, sreq->debug_index, sreq->source,
+ sreq->start, sreq->transferred, sreq->len,
+ refcount_read(&sreq->ref), sreq->flags);
+ }
+ }
+}
+
/*
* Successful completion of write of a folio to the server and/or cache. Note
* that we are not allowed to lock the folio here on pain of deadlocking with
@@ -83,9 +111,15 @@ end_wb:
static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
unsigned int *notes)
{
- struct folio_queue *folioq = wreq->buffer;
+ struct folio_queue *folioq = wreq->buffer.tail;
unsigned long long collected_to = wreq->collected_to;
- unsigned int slot = wreq->buffer_head_slot;
+ unsigned int slot = wreq->buffer.first_tail_slot;
+
+ if (WARN_ON_ONCE(!folioq)) {
+ pr_err("[!] Writeback unlock found empty rolling buffer!\n");
+ netfs_dump_request(wreq);
+ return;
+ }
if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) {
if (netfs_pgpriv2_unlock_copied_folios(wreq))
@@ -94,7 +128,9 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
}
if (slot >= folioq_nr_slots(folioq)) {
- folioq = netfs_delete_buffer_head(wreq);
+ folioq = rolling_buffer_delete_spent(&wreq->buffer);
+ if (!folioq)
+ return;
slot = 0;
}
@@ -134,9 +170,9 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
folioq_clear(folioq, slot);
slot++;
if (slot >= folioq_nr_slots(folioq)) {
- if (READ_ONCE(wreq->buffer_tail) == folioq)
- break;
- folioq = netfs_delete_buffer_head(wreq);
+ folioq = rolling_buffer_delete_spent(&wreq->buffer);
+ if (!folioq)
+ goto done;
slot = 0;
}
@@ -144,222 +180,9 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
break;
}
- wreq->buffer = folioq;
- wreq->buffer_head_slot = slot;
-}
-
-/*
- * Perform retries on the streams that need it.
- */
-static void netfs_retry_write_stream(struct netfs_io_request *wreq,
- struct netfs_io_stream *stream)
-{
- struct list_head *next;
-
- _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
-
- if (list_empty(&stream->subrequests))
- return;
-
- if (stream->source == NETFS_UPLOAD_TO_SERVER &&
- wreq->netfs_ops->retry_request)
- wreq->netfs_ops->retry_request(wreq, stream);
-
- if (unlikely(stream->failed))
- return;
-
- /* If there's no renegotiation to do, just resend each failed subreq. */
- if (!stream->prepare_write) {
- struct netfs_io_subrequest *subreq;
-
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
- break;
- if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
- struct iov_iter source = subreq->io_iter;
-
- iov_iter_revert(&source, subreq->len - source.count);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
- netfs_reissue_write(stream, subreq, &source);
- }
- }
- return;
- }
-
- next = stream->subrequests.next;
-
- do {
- struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
- struct iov_iter source;
- unsigned long long start, len;
- size_t part;
- bool boundary = false;
-
- /* Go through the stream and find the next span of contiguous
- * data that we then rejig (cifs, for example, needs the wsize
- * renegotiating) and reissue.
- */
- from = list_entry(next, struct netfs_io_subrequest, rreq_link);
- to = from;
- start = from->start + from->transferred;
- len = from->len - from->transferred;
-
- if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
- !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
- return;
-
- list_for_each_continue(next, &stream->subrequests) {
- subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
- if (subreq->start + subreq->transferred != start + len ||
- test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
- !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
- break;
- to = subreq;
- len += to->len;
- }
-
- /* Determine the set of buffers we're going to use. Each
- * subreq gets a subset of a single overall contiguous buffer.
- */
- netfs_reset_iter(from);
- source = from->io_iter;
- source.count = len;
-
- /* Work through the sublist. */
- subreq = from;
- list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
- if (!len)
- break;
- /* Renegotiate max_len (wsize) */
- trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
- __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- subreq->retry_count++;
- stream->prepare_write(subreq);
-
- part = min(len, stream->sreq_max_len);
- subreq->len = part;
- subreq->start = start;
- subreq->transferred = 0;
- len -= part;
- start += part;
- if (len && subreq == to &&
- __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
- boundary = true;
-
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
- netfs_reissue_write(stream, subreq, &source);
- if (subreq == to)
- break;
- }
-
- /* If we managed to use fewer subreqs, we can discard the
- * excess; if we used the same number, then we're done.
- */
- if (!len) {
- if (subreq == to)
- continue;
- list_for_each_entry_safe_from(subreq, tmp,
- &stream->subrequests, rreq_link) {
- trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
- list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
- if (subreq == to)
- break;
- }
- continue;
- }
-
- /* We ran out of subrequests, so we need to allocate some more
- * and insert them after.
- */
- do {
- subreq = netfs_alloc_subrequest(wreq);
- subreq->source = to->source;
- subreq->start = start;
- subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
- subreq->stream_nr = to->stream_nr;
- subreq->retry_count = 1;
-
- trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
- refcount_read(&subreq->ref),
- netfs_sreq_trace_new);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
-
- list_add(&subreq->rreq_link, &to->rreq_link);
- to = list_next_entry(to, rreq_link);
- trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
-
- stream->sreq_max_len = len;
- stream->sreq_max_segs = INT_MAX;
- switch (stream->source) {
- case NETFS_UPLOAD_TO_SERVER:
- netfs_stat(&netfs_n_wh_upload);
- stream->sreq_max_len = umin(len, wreq->wsize);
- break;
- case NETFS_WRITE_TO_CACHE:
- netfs_stat(&netfs_n_wh_write);
- break;
- default:
- WARN_ON_ONCE(1);
- }
-
- stream->prepare_write(subreq);
-
- part = umin(len, stream->sreq_max_len);
- subreq->len = subreq->transferred + part;
- len -= part;
- start += part;
- if (!len && boundary) {
- __set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
- boundary = false;
- }
-
- netfs_reissue_write(stream, subreq, &source);
- if (!len)
- break;
-
- } while (len);
-
- } while (!list_is_head(next, &stream->subrequests));
-}
-
-/*
- * Perform retries on the streams that need it. If we're doing content
- * encryption and the server copy changed due to a third-party write, we may
- * need to do an RMW cycle and also rewrite the data to the cache.
- */
-static void netfs_retry_writes(struct netfs_io_request *wreq)
-{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream;
- int s;
-
- /* Wait for all outstanding I/O to quiesce before performing retries as
- * we may need to renegotiate the I/O sizes.
- */
- for (s = 0; s < NR_IO_STREAMS; s++) {
- stream = &wreq->io_streams[s];
- if (!stream->active)
- continue;
-
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
- }
- }
-
- // TODO: Enc: Fetch changed partial pages
- // TODO: Enc: Reencrypt content if needed.
- // TODO: Enc: Wind back transferred point.
- // TODO: Enc: Mark cache pages for retry.
-
- for (s = 0; s < NR_IO_STREAMS; s++) {
- stream = &wreq->io_streams[s];
- if (stream->need_retry) {
- stream->need_retry = false;
- netfs_retry_write_stream(wreq, stream);
- }
- }
+ wreq->buffer.tail = folioq;
+done:
+ wreq->buffer.first_tail_slot = slot;
}
/*
@@ -390,7 +213,7 @@ reassess_streams:
if (wreq->origin == NETFS_WRITEBACK ||
wreq->origin == NETFS_WRITETHROUGH ||
wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE)
- notes = BUFFERED;
+ notes = NEED_UNLOCK;
else
notes = 0;
@@ -449,14 +272,14 @@ reassess_streams:
cancel:
/* Remove if completely consumed. */
- spin_lock_bh(&wreq->lock);
+ spin_lock(&wreq->lock);
remove = front;
list_del_init(&front->rreq_link);
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
stream->front = front;
- spin_unlock_bh(&wreq->lock);
+ spin_unlock(&wreq->lock);
netfs_put_subrequest(remove, false,
notes & SAW_FAILURE ?
netfs_sreq_trace_put_cancel :
@@ -487,7 +310,7 @@ reassess_streams:
trace_netfs_collect_state(wreq, wreq->collected_to, notes);
/* Unlock any folios that we have now finished with. */
- if (notes & BUFFERED) {
+ if (notes & NEED_UNLOCK) {
if (wreq->cleaned_to < wreq->collected_to)
netfs_writeback_unlock_folios(wreq, &notes);
} else {
@@ -500,7 +323,9 @@ reassess_streams:
goto need_retry;
if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
- clear_and_wake_up_bit(NETFS_RREQ_PAUSE, &wreq->flags);
+ clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
+ smp_mb__after_atomic(); /* Set PAUSE before task state */
+ wake_up(&wreq->waitq);
}
if (notes & NEED_REASSESS) {
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index ff0e82505a0b..69727411683e 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -94,9 +94,10 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
{
struct netfs_io_request *wreq;
struct netfs_inode *ictx;
- bool is_buffered = (origin == NETFS_WRITEBACK ||
- origin == NETFS_WRITETHROUGH ||
- origin == NETFS_PGPRIV2_COPY_TO_CACHE);
+ bool is_cacheable = (origin == NETFS_WRITEBACK ||
+ origin == NETFS_WRITEBACK_SINGLE ||
+ origin == NETFS_WRITETHROUGH ||
+ origin == NETFS_PGPRIV2_COPY_TO_CACHE);
wreq = netfs_alloc_request(mapping, file, start, 0, origin);
if (IS_ERR(wreq))
@@ -105,8 +106,10 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
_enter("R=%x", wreq->debug_id);
ictx = netfs_inode(wreq->inode);
- if (is_buffered && netfs_is_cache_enabled(ictx))
+ if (is_cacheable && netfs_is_cache_enabled(ictx))
fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
+ if (rolling_buffer_init(&wreq->buffer, wreq->debug_id, ITER_SOURCE) < 0)
+ goto nomem;
wreq->cleaned_to = wreq->start;
@@ -129,6 +132,10 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
}
return wreq;
+nomem:
+ wreq->error = -ENOMEM;
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_failed);
+ return ERR_PTR(-ENOMEM);
}
/**
@@ -153,16 +160,15 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
loff_t start)
{
struct netfs_io_subrequest *subreq;
- struct iov_iter *wreq_iter = &wreq->io_iter;
+ struct iov_iter *wreq_iter = &wreq->buffer.iter;
/* Make sure we don't point the iterator at a used-up folio_queue
* struct being used as a placeholder to prevent the queue from
* collapsing. In such a case, extend the queue.
*/
if (iov_iter_is_folioq(wreq_iter) &&
- wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq)) {
- netfs_buffer_make_space(wreq);
- }
+ wreq_iter->folioq_slot >= folioq_nr_slots(wreq_iter->folioq))
+ rolling_buffer_make_space(&wreq->buffer);
subreq = netfs_alloc_subrequest(wreq);
subreq->source = stream->source;
@@ -198,7 +204,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
* the list. The collector only goes nextwards and uses the lock to
* remove entries off of the front.
*/
- spin_lock_bh(&wreq->lock);
+ spin_lock(&wreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
stream->front = subreq;
@@ -209,7 +215,7 @@ static void netfs_prepare_write(struct netfs_io_request *wreq,
}
}
- spin_unlock_bh(&wreq->lock);
+ spin_unlock(&wreq->lock);
stream->construct = subreq;
}
@@ -268,9 +274,9 @@ void netfs_issue_write(struct netfs_io_request *wreq,
* we can avoid overrunning the credits obtained (cifs) and try to parallelise
* content-crypto preparation with network writes.
*/
-int netfs_advance_write(struct netfs_io_request *wreq,
- struct netfs_io_stream *stream,
- loff_t start, size_t len, bool to_eof)
+size_t netfs_advance_write(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream,
+ loff_t start, size_t len, bool to_eof)
{
struct netfs_io_subrequest *subreq = stream->construct;
size_t part;
@@ -327,6 +333,9 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
_enter("");
+ if (rolling_buffer_make_space(&wreq->buffer) < 0)
+ return -ENOMEM;
+
/* netfs_perform_write() may shift i_size around the page or from out
* of the page to beyond it, but cannot move i_size into or through the
* page since we have it locked.
@@ -431,7 +440,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
}
/* Attach the folio to the rolling buffer. */
- netfs_buffer_append_folio(wreq, folio, false);
+ rolling_buffer_append(&wreq->buffer, folio, 0);
/* Move the submission point forward to allow for write-streaming data
* not starting at the front of the page. We don't do write-streaming
@@ -444,7 +453,8 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
stream = &wreq->io_streams[s];
stream->submit_off = foff;
stream->submit_len = flen;
- if ((stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
+ if (!stream->avail ||
+ (stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
(stream->source == NETFS_UPLOAD_TO_SERVER &&
fgroup == NETFS_FOLIO_COPY_TO_CACHE)) {
stream->submit_off = UINT_MAX;
@@ -478,7 +488,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
/* Advance the iterator(s). */
if (stream->submit_off > iter_off) {
- iov_iter_advance(&wreq->io_iter, stream->submit_off - iter_off);
+ rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
iter_off = stream->submit_off;
}
@@ -496,7 +506,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
}
if (fsize > iter_off)
- iov_iter_advance(&wreq->io_iter, fsize - iter_off);
+ rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
atomic64_set(&wreq->issued_to, fpos + fsize);
if (!debug)
@@ -635,7 +645,7 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
struct folio **writethrough_cache)
{
_enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
- wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
+ wreq->debug_id, wreq->buffer.iter.count, wreq->wsize, copied, to_page_end);
if (!*writethrough_cache) {
if (folio_test_dirty(folio))
@@ -710,10 +720,10 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
part = netfs_advance_write(wreq, upload, start, len, false);
start += part;
len -= part;
- iov_iter_advance(&wreq->io_iter, part);
+ rolling_buffer_advance(&wreq->buffer, part);
if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
- wait_on_bit(&wreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE);
+ wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags));
}
if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
break;
@@ -723,3 +733,194 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
_leave(" = %d", error);
return error;
}
+
+/*
+ * Write some of a pending folio data back to the server and/or the cache.
+ */
+static int netfs_write_folio_single(struct netfs_io_request *wreq,
+ struct folio *folio)
+{
+ struct netfs_io_stream *upload = &wreq->io_streams[0];
+ struct netfs_io_stream *cache = &wreq->io_streams[1];
+ struct netfs_io_stream *stream;
+ size_t iter_off = 0;
+ size_t fsize = folio_size(folio), flen;
+ loff_t fpos = folio_pos(folio);
+ bool to_eof = false;
+ bool no_debug = false;
+
+ _enter("");
+
+ flen = folio_size(folio);
+ if (flen > wreq->i_size - fpos) {
+ flen = wreq->i_size - fpos;
+ folio_zero_segment(folio, flen, fsize);
+ to_eof = true;
+ } else if (flen == wreq->i_size - fpos) {
+ to_eof = true;
+ }
+
+ _debug("folio %zx/%zx", flen, fsize);
+
+ if (!upload->avail && !cache->avail) {
+ trace_netfs_folio(folio, netfs_folio_trace_cancel_store);
+ return 0;
+ }
+
+ if (!upload->construct)
+ trace_netfs_folio(folio, netfs_folio_trace_store);
+ else
+ trace_netfs_folio(folio, netfs_folio_trace_store_plus);
+
+ /* Attach the folio to the rolling buffer. */
+ folio_get(folio);
+ rolling_buffer_append(&wreq->buffer, folio, NETFS_ROLLBUF_PUT_MARK);
+
+ /* Move the submission point forward to allow for write-streaming data
+ * not starting at the front of the page. We don't do write-streaming
+ * with the cache as the cache requires DIO alignment.
+ *
+ * Also skip uploading for data that's been read and just needs copying
+ * to the cache.
+ */
+ for (int s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ stream->submit_off = 0;
+ stream->submit_len = flen;
+ if (!stream->avail) {
+ stream->submit_off = UINT_MAX;
+ stream->submit_len = 0;
+ }
+ }
+
+ /* Attach the folio to one or more subrequests. For a big folio, we
+ * could end up with thousands of subrequests if the wsize is small -
+ * but we might need to wait during the creation of subrequests for
+ * network resources (eg. SMB credits).
+ */
+ for (;;) {
+ ssize_t part;
+ size_t lowest_off = ULONG_MAX;
+ int choose_s = -1;
+
+ /* Always add to the lowest-submitted stream first. */
+ for (int s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->submit_len > 0 &&
+ stream->submit_off < lowest_off) {
+ lowest_off = stream->submit_off;
+ choose_s = s;
+ }
+ }
+
+ if (choose_s < 0)
+ break;
+ stream = &wreq->io_streams[choose_s];
+
+ /* Advance the iterator(s). */
+ if (stream->submit_off > iter_off) {
+ rolling_buffer_advance(&wreq->buffer, stream->submit_off - iter_off);
+ iter_off = stream->submit_off;
+ }
+
+ atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
+ stream->submit_extendable_to = fsize - stream->submit_off;
+ part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
+ stream->submit_len, to_eof);
+ stream->submit_off += part;
+ if (part > stream->submit_len)
+ stream->submit_len = 0;
+ else
+ stream->submit_len -= part;
+ if (part > 0)
+ no_debug = true;
+ }
+
+ wreq->buffer.iter.iov_offset = 0;
+ if (fsize > iter_off)
+ rolling_buffer_advance(&wreq->buffer, fsize - iter_off);
+ atomic64_set(&wreq->issued_to, fpos + fsize);
+
+ if (!no_debug)
+ kdebug("R=%x: No submit", wreq->debug_id);
+ _leave(" = 0");
+ return 0;
+}
+
+/**
+ * netfs_writeback_single - Write back a monolithic payload
+ * @mapping: The mapping to write from
+ * @wbc: Hints from the VM
+ * @iter: Data to write, must be ITER_FOLIOQ.
+ *
+ * Write a monolithic, non-pagecache object back to the server and/or
+ * the cache.
+ */
+int netfs_writeback_single(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct iov_iter *iter)
+{
+ struct netfs_io_request *wreq;
+ struct netfs_inode *ictx = netfs_inode(mapping->host);
+ struct folio_queue *fq;
+ size_t size = iov_iter_count(iter);
+ int ret;
+
+ if (WARN_ON_ONCE(!iov_iter_is_folioq(iter)))
+ return -EIO;
+
+ if (!mutex_trylock(&ictx->wb_lock)) {
+ if (wbc->sync_mode == WB_SYNC_NONE) {
+ netfs_stat(&netfs_n_wb_lock_skip);
+ return 0;
+ }
+ netfs_stat(&netfs_n_wb_lock_wait);
+ mutex_lock(&ictx->wb_lock);
+ }
+
+ wreq = netfs_create_write_req(mapping, NULL, 0, NETFS_WRITEBACK_SINGLE);
+ if (IS_ERR(wreq)) {
+ ret = PTR_ERR(wreq);
+ goto couldnt_start;
+ }
+
+ trace_netfs_write(wreq, netfs_write_trace_writeback);
+ netfs_stat(&netfs_n_wh_writepages);
+
+ if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
+ wreq->netfs_ops->begin_writeback(wreq);
+
+ for (fq = (struct folio_queue *)iter->folioq; fq; fq = fq->next) {
+ for (int slot = 0; slot < folioq_count(fq); slot++) {
+ struct folio *folio = folioq_folio(fq, slot);
+ size_t part = umin(folioq_folio_size(fq, slot), size);
+
+ _debug("wbiter %lx %llx", folio->index, atomic64_read(&wreq->issued_to));
+
+ ret = netfs_write_folio_single(wreq, folio);
+ if (ret < 0)
+ goto stop;
+ size -= part;
+ if (size <= 0)
+ goto stop;
+ }
+ }
+
+stop:
+ for (int s = 0; s < NR_IO_STREAMS; s++)
+ netfs_issue_write(wreq, &wreq->io_streams[s]);
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+
+ mutex_unlock(&ictx->wb_lock);
+
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ _leave(" = %d", ret);
+ return ret;
+
+couldnt_start:
+ mutex_unlock(&ictx->wb_lock);
+ _leave(" = %d", ret);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_writeback_single);
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
new file mode 100644
index 000000000000..c841a851dd73
--- /dev/null
+++ b/fs/netfs/write_retry.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Network filesystem write retrying.
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+/*
+ * Perform retries on the streams that need it.
+ */
+static void netfs_retry_write_stream(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream)
+{
+ struct list_head *next;
+
+ _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
+
+ if (list_empty(&stream->subrequests))
+ return;
+
+ if (stream->source == NETFS_UPLOAD_TO_SERVER &&
+ wreq->netfs_ops->retry_request)
+ wreq->netfs_ops->retry_request(wreq, stream);
+
+ if (unlikely(stream->failed))
+ return;
+
+ /* If there's no renegotiation to do, just resend each failed subreq. */
+ if (!stream->prepare_write) {
+ struct netfs_io_subrequest *subreq;
+
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
+ break;
+ if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
+ struct iov_iter source = subreq->io_iter;
+
+ iov_iter_revert(&source, subreq->len - source.count);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ netfs_reissue_write(stream, subreq, &source);
+ }
+ }
+ return;
+ }
+
+ next = stream->subrequests.next;
+
+ do {
+ struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
+ struct iov_iter source;
+ unsigned long long start, len;
+ size_t part;
+ bool boundary = false;
+
+ /* Go through the stream and find the next span of contiguous
+ * data that we then rejig (cifs, for example, needs the wsize
+ * renegotiating) and reissue.
+ */
+ from = list_entry(next, struct netfs_io_subrequest, rreq_link);
+ to = from;
+ start = from->start + from->transferred;
+ len = from->len - from->transferred;
+
+ if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
+ !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
+ return;
+
+ list_for_each_continue(next, &stream->subrequests) {
+ subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
+ if (subreq->start + subreq->transferred != start + len ||
+ test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
+ !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
+ break;
+ to = subreq;
+ len += to->len;
+ }
+
+ /* Determine the set of buffers we're going to use. Each
+ * subreq gets a subset of a single overall contiguous buffer.
+ */
+ netfs_reset_iter(from);
+ source = from->io_iter;
+ source.count = len;
+
+ /* Work through the sublist. */
+ subreq = from;
+ list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
+ if (!len)
+ break;
+
+ subreq->start = start;
+ subreq->len = len;
+ __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ subreq->retry_count++;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
+
+ /* Renegotiate max_len (wsize) */
+ stream->sreq_max_len = len;
+ stream->prepare_write(subreq);
+
+ part = umin(len, stream->sreq_max_len);
+ if (unlikely(stream->sreq_max_segs))
+ part = netfs_limit_iter(&source, 0, part, stream->sreq_max_segs);
+ subreq->len = part;
+ subreq->transferred = 0;
+ len -= part;
+ start += part;
+ if (len && subreq == to &&
+ __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
+ boundary = true;
+
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ netfs_reissue_write(stream, subreq, &source);
+ if (subreq == to)
+ break;
+ }
+
+ /* If we managed to use fewer subreqs, we can discard the
+ * excess; if we used the same number, then we're done.
+ */
+ if (!len) {
+ if (subreq == to)
+ continue;
+ list_for_each_entry_safe_from(subreq, tmp,
+ &stream->subrequests, rreq_link) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
+ list_del(&subreq->rreq_link);
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ if (subreq == to)
+ break;
+ }
+ continue;
+ }
+
+ /* We ran out of subrequests, so we need to allocate some more
+ * and insert them after.
+ */
+ do {
+ subreq = netfs_alloc_subrequest(wreq);
+ subreq->source = to->source;
+ subreq->start = start;
+ subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
+ subreq->stream_nr = to->stream_nr;
+ subreq->retry_count = 1;
+
+ trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
+ refcount_read(&subreq->ref),
+ netfs_sreq_trace_new);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+
+ list_add(&subreq->rreq_link, &to->rreq_link);
+ to = list_next_entry(to, rreq_link);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
+
+ stream->sreq_max_len = len;
+ stream->sreq_max_segs = INT_MAX;
+ switch (stream->source) {
+ case NETFS_UPLOAD_TO_SERVER:
+ netfs_stat(&netfs_n_wh_upload);
+ stream->sreq_max_len = umin(len, wreq->wsize);
+ break;
+ case NETFS_WRITE_TO_CACHE:
+ netfs_stat(&netfs_n_wh_write);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ stream->prepare_write(subreq);
+
+ part = umin(len, stream->sreq_max_len);
+ subreq->len = subreq->transferred + part;
+ len -= part;
+ start += part;
+ if (!len && boundary) {
+ __set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
+ boundary = false;
+ }
+
+ netfs_reissue_write(stream, subreq, &source);
+ if (!len)
+ break;
+
+ } while (len);
+
+ } while (!list_is_head(next, &stream->subrequests));
+}
+
+/*
+ * Perform retries on the streams that need it. If we're doing content
+ * encryption and the server copy changed due to a third-party write, we may
+ * need to do an RMW cycle and also rewrite the data to the cache.
+ */
+void netfs_retry_writes(struct netfs_io_request *wreq)
+{
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream;
+ int s;
+
+ /* Wait for all outstanding I/O to quiesce before performing retries as
+ * we may need to renegotiate the I/O sizes.
+ */
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (!stream->active)
+ continue;
+
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE);
+ }
+ }
+
+ // TODO: Enc: Fetch changed partial pages
+ // TODO: Enc: Reencrypt content if needed.
+ // TODO: Enc: Wind back transferred point.
+ // TODO: Enc: Mark cache pages for retry.
+
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->need_retry) {
+ stream->need_retry = false;
+ netfs_retry_write_stream(wreq, stream);
+ }
+ }
+}
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index d49e4ce27999..e278a1ad1ca3 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -314,8 +314,10 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
&nfs_async_read_completion_ops);
netfs = nfs_netfs_alloc(sreq);
- if (!netfs)
- return netfs_read_subreq_terminated(sreq, -ENOMEM, false);
+ if (!netfs) {
+ sreq->error = -ENOMEM;
+ return netfs_read_subreq_terminated(sreq);
+ }
pgio.pg_netfs = netfs; /* used in completion */
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 772d485e96d3..9d86868f4998 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -74,7 +74,8 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
*/
netfs->sreq->transferred = min_t(s64, netfs->sreq->len,
atomic64_read(&netfs->transferred));
- netfs_read_subreq_terminated(netfs->sreq, netfs->error, false);
+ netfs->sreq->error = netfs->error;
+ netfs_read_subreq_terminated(netfs->sreq);
kfree(netfs);
}
static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 6cb1e81993f8..000522a34985 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1258,14 +1258,6 @@ openRetry:
return rc;
}
-static void cifs_readv_worker(struct work_struct *work)
-{
- struct cifs_io_subrequest *rdata =
- container_of(work, struct cifs_io_subrequest, subreq.work);
-
- netfs_read_subreq_terminated(&rdata->subreq, rdata->result, false);
-}
-
static void
cifs_readv_callback(struct mid_q_entry *mid)
{
@@ -1330,11 +1322,13 @@ cifs_readv_callback(struct mid_q_entry *mid)
} else if (rdata->got_bytes > 0) {
__set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags);
}
+ if (rdata->got_bytes)
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags);
}
rdata->credits.value = 0;
+ rdata->subreq.error = rdata->result;
rdata->subreq.transferred += rdata->got_bytes;
- INIT_WORK(&rdata->subreq.work, cifs_readv_worker);
queue_work(cifsiod_wq, &rdata->subreq.work);
release_mid(mid);
add_credits(server, &credits, 0);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index a58a3333ecc3..27a1757a278e 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -227,7 +227,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq)
return;
failed:
- netfs_read_subreq_terminated(subreq, rc, false);
+ subreq->error = rc;
+ netfs_read_subreq_terminated(subreq);
}
/*
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 87cb1872db28..7121d9e0f404 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4388,7 +4388,7 @@ static struct folio_queue *cifs_alloc_folioq_buffer(ssize_t size)
p = kmalloc(sizeof(*p), GFP_NOFS);
if (!p)
goto nomem;
- folioq_init(p);
+ folioq_init(p, 0);
if (tail) {
tail->next = p;
p->prev = tail;
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 458b53d1f9cb..81c76e1b7209 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4500,14 +4500,6 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
return rc;
}
-static void smb2_readv_worker(struct work_struct *work)
-{
- struct cifs_io_subrequest *rdata =
- container_of(work, struct cifs_io_subrequest, subreq.work);
-
- netfs_read_subreq_terminated(&rdata->subreq, rdata->result, false);
-}
-
static void
smb2_readv_callback(struct mid_q_entry *mid)
{
@@ -4615,16 +4607,17 @@ smb2_readv_callback(struct mid_q_entry *mid)
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
rdata->result = 0;
}
- __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags);
+ if (rdata->got_bytes)
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags);
}
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value,
server->credits, server->in_flight,
0, cifs_trace_rw_credits_read_response_clear);
rdata->credits.value = 0;
+ rdata->subreq.error = rdata->result;
rdata->subreq.transferred += rdata->got_bytes;
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_progress);
- INIT_WORK(&rdata->subreq.work, smb2_readv_worker);
- queue_work(cifsiod_wq, &rdata->subreq.work);
+ netfs_read_subreq_terminated(&rdata->subreq);
release_mid(mid);
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
server->credits, server->in_flight,
diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h
index 3abe614ef5f0..4d3f8074c137 100644
--- a/include/linux/folio_queue.h
+++ b/include/linux/folio_queue.h
@@ -37,16 +37,20 @@ struct folio_queue {
#if PAGEVEC_SIZE > BITS_PER_LONG
#error marks is not big enough
#endif
+ unsigned int rreq_id;
+ unsigned int debug_id;
};
/**
* folioq_init - Initialise a folio queue segment
* @folioq: The segment to initialise
+ * @rreq_id: The request identifier to use in tracelines.
*
- * Initialise a folio queue segment. Note that the folio pointers are
- * left uninitialised.
+ * Initialise a folio queue segment and set an identifier to be used in traces.
+ *
+ * Note that the folio pointers are left uninitialised.
*/
-static inline void folioq_init(struct folio_queue *folioq)
+static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id)
{
folio_batch_init(&folioq->vec);
folioq->next = NULL;
@@ -54,6 +58,8 @@ static inline void folioq_init(struct folio_queue *folioq)
folioq->marks = 0;
folioq->marks2 = 0;
folioq->marks3 = 0;
+ folioq->rreq_id = rreq_id;
+ folioq->debug_id = 0;
}
/**
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index ecdd5ced16a8..071d05d81d38 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -18,9 +18,11 @@
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
+#include <linux/rolling_buffer.h>
enum netfs_sreq_ref_trace;
typedef struct mempool_s mempool_t;
+struct folio_queue;
/**
* folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED]
@@ -71,6 +73,7 @@ struct netfs_inode {
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */
#define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */
+#define NETFS_ICTX_SINGLE_NO_UPLOAD 4 /* Monolithic payload, cache but no upload */
};
/*
@@ -178,9 +181,6 @@ struct netfs_io_subrequest {
unsigned long long start; /* Where to start the I/O */
size_t len; /* Size of the I/O */
size_t transferred; /* Amount of data transferred */
- size_t consumed; /* Amount of read data consumed */
- size_t prev_donated; /* Amount of data donated from previous subreq */
- size_t next_donated; /* Amount of data donated from next subreq */
refcount_t ref;
short error; /* 0 or error that occurred */
unsigned short debug_index; /* Index in list (for debugging output) */
@@ -188,9 +188,6 @@ struct netfs_io_subrequest {
u8 retry_count; /* The number of retries (0 on initial pass) */
enum netfs_io_source source; /* Where to read from/write to */
unsigned char stream_nr; /* I/O stream this belongs to */
- unsigned char curr_folioq_slot; /* Folio currently being read */
- unsigned char curr_folio_order; /* Order of folio */
- struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */
unsigned long flags;
#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
@@ -208,9 +205,11 @@ enum netfs_io_origin {
NETFS_READAHEAD, /* This read was triggered by readahead */
NETFS_READPAGE, /* This read is a synchronous read */
NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */
+ NETFS_READ_SINGLE, /* This read should be treated as a single object */
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
NETFS_DIO_READ, /* This is a direct I/O read */
NETFS_WRITEBACK, /* This write was triggered by writepages */
+ NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */
NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
NETFS_DIO_WRITE, /* This is a direct I/O write */
@@ -231,16 +230,16 @@ struct netfs_io_request {
struct address_space *mapping; /* The mapping being accessed */
struct kiocb *iocb; /* AIO completion vector */
struct netfs_cache_resources cache_resources;
+ struct netfs_io_request *copy_to_cache; /* Request to write just-read data to the cache */
struct readahead_control *ractl; /* Readahead descriptor */
struct list_head proc_link; /* Link in netfs_iorequests */
- struct list_head subrequests; /* Contributory I/O operations */
struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */
#define NR_IO_STREAMS 2 //wreq->nr_io_streams
struct netfs_group *group; /* Writeback group being written back */
- struct folio_queue *buffer; /* Head of I/O buffer */
- struct folio_queue *buffer_tail; /* Tail of I/O buffer */
- struct iov_iter iter; /* Unencrypted-side iterator */
- struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */
+ struct rolling_buffer buffer; /* Unencrypted buffer */
+#define NETFS_ROLLBUF_PUT_MARK ROLLBUF_MARK_1
+#define NETFS_ROLLBUF_PAGECACHE_MARK ROLLBUF_MARK_2
+ wait_queue_head_t waitq; /* Processor waiter */
void *netfs_priv; /* Private data for the netfs */
void *netfs_priv2; /* Private data for the netfs */
struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */
@@ -251,28 +250,28 @@ struct netfs_io_request {
atomic_t subreq_counter; /* Next subreq->debug_index */
unsigned int nr_group_rel; /* Number of refs to release on ->group */
spinlock_t lock; /* Lock for queuing subreqs */
- atomic_t nr_outstanding; /* Number of ops in progress */
unsigned long long submitted; /* Amount submitted for I/O so far */
unsigned long long len; /* Length of the request */
size_t transferred; /* Amount to be indicated as transferred */
long error; /* 0 or error that occurred */
enum netfs_io_origin origin; /* Origin of the request */
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
- u8 buffer_head_slot; /* First slot in ->buffer */
- u8 buffer_tail_slot; /* Next slot in ->buffer_tail */
unsigned long long i_size; /* Size of the file */
unsigned long long start; /* Start position */
atomic64_t issued_to; /* Write issuer folio cursor */
unsigned long long collected_to; /* Point we've collected to */
unsigned long long cleaned_to; /* Position we've cleaned folios to */
+ unsigned long long abandon_to; /* Position to abandon folios to */
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
- size_t prev_donated; /* Fallback for subreq->prev_donated */
+ unsigned char front_folio_order; /* Order (size) of front folio */
refcount_t ref;
unsigned long flags;
+#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */
#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
#define NETFS_RREQ_FAILED 4 /* The request failed */
#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
+#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */
#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
#define NETFS_RREQ_BLOCKED 10 /* We blocked */
@@ -409,6 +408,13 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
struct netfs_group *netfs_group);
ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
+/* Single, monolithic object read/write API. */
+void netfs_single_mark_inode_dirty(struct inode *inode);
+ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_iter *iter);
+int netfs_writeback_single(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct iov_iter *iter);
+
/* Address operations API */
struct readahead_control;
void netfs_readahead(struct readahead_control *);
@@ -428,10 +434,8 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp);
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
/* (Sub)request management API. */
-void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
- bool was_async);
-void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
- int error, bool was_async);
+void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq);
+void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq);
void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
enum netfs_sreq_ref_trace what);
void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
@@ -453,6 +457,18 @@ void netfs_end_io_write(struct inode *inode);
int netfs_start_io_direct(struct inode *inode);
void netfs_end_io_direct(struct inode *inode);
+/* Miscellaneous APIs. */
+struct folio_queue *netfs_folioq_alloc(unsigned int rreq_id, gfp_t gfp,
+ unsigned int trace /*enum netfs_folioq_trace*/);
+void netfs_folioq_free(struct folio_queue *folioq,
+ unsigned int trace /*enum netfs_trace_folioq*/);
+
+/* Buffer wrangling helpers API. */
+int netfs_alloc_folioq_buffer(struct address_space *mapping,
+ struct folio_queue **_buffer,
+ size_t *_cur_size, ssize_t size, gfp_t gfp);
+void netfs_free_folioq_buffer(struct folio_queue *fq);
+
/**
* netfs_inode - Get the netfs inode context from the inode
* @inode: The inode to query
diff --git a/include/linux/rolling_buffer.h b/include/linux/rolling_buffer.h
new file mode 100644
index 000000000000..ac15b1ffdd83
--- /dev/null
+++ b/include/linux/rolling_buffer.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Rolling buffer of folios
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _ROLLING_BUFFER_H
+#define _ROLLING_BUFFER_H
+
+#include <linux/folio_queue.h>
+#include <linux/uio.h>
+
+/*
+ * Rolling buffer. Whilst the buffer is live and in use, folios and folio
+ * queue segments can be added to one end by one thread and removed from the
+ * other end by another thread. The buffer isn't allowed to be empty; it must
+ * always have at least one folio_queue in it so that neither side has to
+ * modify both queue pointers.
+ *
+ * The iterator in the buffer is extended as buffers are inserted. It can be
+ * snapshotted to use a segment of the buffer.
+ */
+struct rolling_buffer {
+ struct folio_queue *head; /* Producer's insertion point */
+ struct folio_queue *tail; /* Consumer's removal point */
+ struct iov_iter iter; /* Iterator tracking what's left in the buffer */
+ u8 next_head_slot; /* Next slot in ->head */
+ u8 first_tail_slot; /* First slot in ->tail */
+};
+
+/*
+ * Snapshot of a rolling buffer.
+ */
+struct rolling_buffer_snapshot {
+ struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */
+ unsigned char curr_slot; /* Folio currently being read */
+ unsigned char curr_order; /* Order of folio */
+};
+
+/* Marks to store per-folio in the internal folio_queue structs. */
+#define ROLLBUF_MARK_1 BIT(0)
+#define ROLLBUF_MARK_2 BIT(1)
+
+int rolling_buffer_init(struct rolling_buffer *roll, unsigned int rreq_id,
+ unsigned int direction);
+int rolling_buffer_make_space(struct rolling_buffer *roll);
+ssize_t rolling_buffer_load_from_ra(struct rolling_buffer *roll,
+ struct readahead_control *ractl,
+ struct folio_batch *put_batch);
+ssize_t rolling_buffer_append(struct rolling_buffer *roll, struct folio *folio,
+ unsigned int flags);
+struct folio_queue *rolling_buffer_delete_spent(struct rolling_buffer *roll);
+void rolling_buffer_clear(struct rolling_buffer *roll);
+
+static inline void rolling_buffer_advance(struct rolling_buffer *roll, size_t amount)
+{
+ iov_iter_advance(&roll->iter, amount);
+}
+
+#endif /* _ROLLING_BUFFER_H */
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index a0aed1a428a1..2e92487f3f34 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -118,6 +118,8 @@ enum yfs_cm_operation {
*/
#define afs_call_traces \
EM(afs_call_trace_alloc, "ALLOC") \
+ EM(afs_call_trace_async_abort, "ASYAB") \
+ EM(afs_call_trace_async_kill, "ASYKL") \
EM(afs_call_trace_free, "FREE ") \
EM(afs_call_trace_get, "GET ") \
EM(afs_call_trace_put, "PUT ") \
@@ -323,6 +325,44 @@ enum yfs_cm_operation {
EM(yfs_CB_TellMeAboutYourself, "YFSCB.TellMeAboutYourself") \
E_(yfs_CB_CallBack, "YFSCB.CallBack")
+#define afs_cb_promise_traces \
+ EM(afs_cb_promise_clear_cb_break, "CLEAR cb-break") \
+ EM(afs_cb_promise_clear_rmdir, "CLEAR rmdir") \
+ EM(afs_cb_promise_clear_rotate_server, "CLEAR rot-srv") \
+ EM(afs_cb_promise_clear_server_change, "CLEAR srv-chg") \
+ EM(afs_cb_promise_clear_vol_init_cb, "CLEAR vol-init-cb") \
+ EM(afs_cb_promise_set_apply_cb, "SET apply-cb") \
+ EM(afs_cb_promise_set_new_inode, "SET new-inode") \
+ E_(afs_cb_promise_set_new_symlink, "SET new-symlink")
+
+#define afs_vnode_invalid_traces \
+ EM(afs_vnode_invalid_trace_cb_ro_snapshot, "cb-ro-snapshot") \
+ EM(afs_vnode_invalid_trace_cb_scrub, "cb-scrub") \
+ EM(afs_vnode_invalid_trace_cb_v_break, "cb-v-break") \
+ EM(afs_vnode_invalid_trace_expired, "expired") \
+ EM(afs_vnode_invalid_trace_no_cb_promise, "no-cb-promise") \
+ EM(afs_vnode_invalid_trace_vol_expired, "vol-expired") \
+ EM(afs_vnode_invalid_trace_zap_data, "zap-data") \
+ E_(afs_vnode_valid_trace, "valid")
+
+#define afs_dir_invalid_traces \
+ EM(afs_dir_invalid_edit_add_bad_size, "edit-add-bad-size") \
+ EM(afs_dir_invalid_edit_add_no_slots, "edit-add-no-slots") \
+ EM(afs_dir_invalid_edit_add_too_many_blocks, "edit-add-too-many-blocks") \
+ EM(afs_dir_invalid_edit_get_block, "edit-get-block") \
+ EM(afs_dir_invalid_edit_mkdir, "edit-mkdir") \
+ EM(afs_dir_invalid_edit_rem_bad_size, "edit-rem-bad-size") \
+ EM(afs_dir_invalid_edit_rem_wrong_name, "edit-rem-wrong_name") \
+ EM(afs_dir_invalid_edit_upd_bad_size, "edit-upd-bad-size") \
+ EM(afs_dir_invalid_edit_upd_no_dd, "edit-upd-no-dotdot") \
+ EM(afs_dir_invalid_dv_mismatch, "dv-mismatch") \
+ EM(afs_dir_invalid_inval_folio, "inv-folio") \
+ EM(afs_dir_invalid_iter_stale, "iter-stale") \
+ EM(afs_dir_invalid_reclaimed_folio, "reclaimed-folio") \
+ EM(afs_dir_invalid_release_folio, "rel-folio") \
+ EM(afs_dir_invalid_remote, "remote") \
+ E_(afs_dir_invalid_subdir_removed, "subdir-removed")
+
#define afs_edit_dir_ops \
EM(afs_edit_dir_create, "create") \
EM(afs_edit_dir_create_error, "c_fail") \
@@ -332,6 +372,7 @@ enum yfs_cm_operation {
EM(afs_edit_dir_delete_error, "d_err ") \
EM(afs_edit_dir_delete_inval, "d_invl") \
EM(afs_edit_dir_delete_noent, "d_nent") \
+ EM(afs_edit_dir_mkdir, "mk_ent") \
EM(afs_edit_dir_update_dd, "u_ddot") \
EM(afs_edit_dir_update_error, "u_fail") \
EM(afs_edit_dir_update_inval, "u_invl") \
@@ -385,6 +426,7 @@ enum yfs_cm_operation {
EM(afs_file_error_dir_over_end, "DIR_ENT_OVER_END") \
EM(afs_file_error_dir_small, "DIR_SMALL") \
EM(afs_file_error_dir_unmarked_ext, "DIR_UNMARKED_EXT") \
+ EM(afs_file_error_symlink_big, "SYM_BIG") \
EM(afs_file_error_mntpt, "MNTPT_READ_FAILED") \
E_(afs_file_error_writeback_fail, "WRITEBACK_FAILED")
@@ -487,7 +529,9 @@ enum yfs_cm_operation {
enum afs_alist_trace { afs_alist_traces } __mode(byte);
enum afs_call_trace { afs_call_traces } __mode(byte);
enum afs_cb_break_reason { afs_cb_break_reasons } __mode(byte);
+enum afs_cb_promise_trace { afs_cb_promise_traces } __mode(byte);
enum afs_cell_trace { afs_cell_traces } __mode(byte);
+enum afs_dir_invalid_trace { afs_dir_invalid_traces} __mode(byte);
enum afs_edit_dir_op { afs_edit_dir_ops } __mode(byte);
enum afs_edit_dir_reason { afs_edit_dir_reasons } __mode(byte);
enum afs_eproto_cause { afs_eproto_causes } __mode(byte);
@@ -498,6 +542,7 @@ enum afs_flock_operation { afs_flock_operations } __mode(byte);
enum afs_io_error { afs_io_errors } __mode(byte);
enum afs_rotate_trace { afs_rotate_traces } __mode(byte);
enum afs_server_trace { afs_server_traces } __mode(byte);
+enum afs_vnode_invalid_trace { afs_vnode_invalid_traces} __mode(byte);
enum afs_volume_trace { afs_volume_traces } __mode(byte);
#endif /* end __AFS_GENERATE_TRACE_ENUMS_ONCE_ONLY */
@@ -513,8 +558,10 @@ enum afs_volume_trace { afs_volume_traces } __mode(byte);
afs_alist_traces;
afs_call_traces;
afs_cb_break_reasons;
+afs_cb_promise_traces;
afs_cell_traces;
afs_cm_operations;
+afs_dir_invalid_traces;
afs_edit_dir_ops;
afs_edit_dir_reasons;
afs_eproto_causes;
@@ -526,6 +573,7 @@ afs_fs_operations;
afs_io_errors;
afs_rotate_traces;
afs_server_traces;
+afs_vnode_invalid_traces;
afs_vl_operations;
yfs_cm_operations;
@@ -670,7 +718,7 @@ TRACE_EVENT(afs_make_fs_call,
}
),
- TP_printk("c=%08x %06llx:%06llx:%06x %s",
+ TP_printk("c=%08x V=%llx i=%llx:%x %s",
__entry->call,
__entry->fid.vid,
__entry->fid.vnode,
@@ -704,7 +752,7 @@ TRACE_EVENT(afs_make_fs_calli,
}
),
- TP_printk("c=%08x %06llx:%06llx:%06x %s i=%u",
+ TP_printk("c=%08x V=%llx i=%llx:%x %s i=%u",
__entry->call,
__entry->fid.vid,
__entry->fid.vnode,
@@ -741,7 +789,7 @@ TRACE_EVENT(afs_make_fs_call1,
__entry->name[__len] = 0;
),
- TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\"",
+ TP_printk("c=%08x V=%llx i=%llx:%x %s \"%s\"",
__entry->call,
__entry->fid.vid,
__entry->fid.vnode,
@@ -782,7 +830,7 @@ TRACE_EVENT(afs_make_fs_call2,
__entry->name2[__len2] = 0;
),
- TP_printk("c=%08x %06llx:%06llx:%06x %s \"%s\" \"%s\"",
+ TP_printk("c=%08x V=%llx i=%llx:%x %s \"%s\" \"%s\"",
__entry->call,
__entry->fid.vid,
__entry->fid.vnode,
@@ -887,9 +935,9 @@ TRACE_EVENT(afs_sent_data,
);
TRACE_EVENT(afs_dir_check_failed,
- TP_PROTO(struct afs_vnode *vnode, loff_t off, loff_t i_size),
+ TP_PROTO(struct afs_vnode *vnode, loff_t off),
- TP_ARGS(vnode, off, i_size),
+ TP_ARGS(vnode, off),
TP_STRUCT__entry(
__field(struct afs_vnode *, vnode)
@@ -900,7 +948,7 @@ TRACE_EVENT(afs_dir_check_failed,
TP_fast_assign(
__entry->vnode = vnode;
__entry->off = off;
- __entry->i_size = i_size;
+ __entry->i_size = i_size_read(&vnode->netfs.inode);
),
TP_printk("vn=%p %llx/%llx",
@@ -1002,7 +1050,7 @@ TRACE_EVENT(afs_edit_dir,
__entry->name[__len] = 0;
),
- TP_printk("d=%x:%x %s %s %u[%u] f=%x:%x \"%s\"",
+ TP_printk("di=%x:%x %s %s %u[%u] fi=%x:%x \"%s\"",
__entry->vnode, __entry->unique,
__print_symbolic(__entry->why, afs_edit_dir_reasons),
__print_symbolic(__entry->op, afs_edit_dir_ops),
@@ -1011,6 +1059,122 @@ TRACE_EVENT(afs_edit_dir,
__entry->name)
);
+TRACE_EVENT(afs_dir_invalid,
+ TP_PROTO(const struct afs_vnode *dvnode, enum afs_dir_invalid_trace trace),
+
+ TP_ARGS(dvnode, trace),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, vnode)
+ __field(unsigned int, unique)
+ __field(enum afs_dir_invalid_trace, trace)
+ ),
+
+ TP_fast_assign(
+ __entry->vnode = dvnode->fid.vnode;
+ __entry->unique = dvnode->fid.unique;
+ __entry->trace = trace;
+ ),
+
+ TP_printk("di=%x:%x %s",
+ __entry->vnode, __entry->unique,
+ __print_symbolic(__entry->trace, afs_dir_invalid_traces))
+ );
+
+TRACE_EVENT(afs_cb_promise,
+ TP_PROTO(const struct afs_vnode *vnode, enum afs_cb_promise_trace trace),
+
+ TP_ARGS(vnode, trace),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, vnode)
+ __field(unsigned int, unique)
+ __field(enum afs_cb_promise_trace, trace)
+ ),
+
+ TP_fast_assign(
+ __entry->vnode = vnode->fid.vnode;
+ __entry->unique = vnode->fid.unique;
+ __entry->trace = trace;
+ ),
+
+ TP_printk("di=%x:%x %s",
+ __entry->vnode, __entry->unique,
+ __print_symbolic(__entry->trace, afs_cb_promise_traces))
+ );
+
+TRACE_EVENT(afs_vnode_invalid,
+ TP_PROTO(const struct afs_vnode *vnode, enum afs_vnode_invalid_trace trace),
+
+ TP_ARGS(vnode, trace),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, vnode)
+ __field(unsigned int, unique)
+ __field(enum afs_vnode_invalid_trace, trace)
+ ),
+
+ TP_fast_assign(
+ __entry->vnode = vnode->fid.vnode;
+ __entry->unique = vnode->fid.unique;
+ __entry->trace = trace;
+ ),
+
+ TP_printk("di=%x:%x %s",
+ __entry->vnode, __entry->unique,
+ __print_symbolic(__entry->trace, afs_vnode_invalid_traces))
+ );
+
+TRACE_EVENT(afs_set_dv,
+ TP_PROTO(const struct afs_vnode *dvnode, u64 new_dv),
+
+ TP_ARGS(dvnode, new_dv),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, vnode)
+ __field(unsigned int, unique)
+ __field(u64, old_dv)
+ __field(u64, new_dv)
+ ),
+
+ TP_fast_assign(
+ __entry->vnode = dvnode->fid.vnode;
+ __entry->unique = dvnode->fid.unique;
+ __entry->old_dv = dvnode->status.data_version;
+ __entry->new_dv = new_dv;
+ ),
+
+ TP_printk("di=%x:%x dv=%llx -> dv=%llx",
+ __entry->vnode, __entry->unique,
+ __entry->old_dv, __entry->new_dv)
+ );
+
+TRACE_EVENT(afs_dv_mismatch,
+ TP_PROTO(const struct afs_vnode *dvnode, u64 before_dv, int delta, u64 new_dv),
+
+ TP_ARGS(dvnode, before_dv, delta, new_dv),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, vnode)
+ __field(unsigned int, unique)
+ __field(int, delta)
+ __field(u64, before_dv)
+ __field(u64, new_dv)
+ ),
+
+ TP_fast_assign(
+ __entry->vnode = dvnode->fid.vnode;
+ __entry->unique = dvnode->fid.unique;
+ __entry->delta = delta;
+ __entry->before_dv = before_dv;
+ __entry->new_dv = new_dv;
+ ),
+
+ TP_printk("di=%x:%x xdv=%llx+%d dv=%llx",
+ __entry->vnode, __entry->unique,
+ __entry->before_dv, __entry->delta, __entry->new_dv)
+ );
+
TRACE_EVENT(afs_protocol_error,
TP_PROTO(struct afs_call *call, enum afs_eproto_cause cause),
@@ -1611,6 +1775,36 @@ TRACE_EVENT(afs_make_call,
__entry->fid.unique)
);
+TRACE_EVENT(afs_read_recv,
+ TP_PROTO(const struct afs_operation *op, const struct afs_call *call),
+
+ TP_ARGS(op, call),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, rreq)
+ __field(unsigned int, sreq)
+ __field(unsigned int, op)
+ __field(unsigned int, op_flags)
+ __field(unsigned int, call)
+ __field(enum afs_call_state, call_state)
+ ),
+
+ TP_fast_assign(
+ __entry->op = op->debug_id;
+ __entry->sreq = op->fetch.subreq->debug_index;
+ __entry->rreq = op->fetch.subreq->rreq->debug_id;
+ __entry->op_flags = op->flags;
+ __entry->call = call->debug_id;
+ __entry->call_state = call->state;
+ ),
+
+ TP_printk("R=%08x[%x] OP=%08x c=%08x cs=%x of=%x",
+ __entry->rreq, __entry->sreq,
+ __entry->op,
+ __entry->call, __entry->call_state,
+ __entry->op_flags)
+ );
+
#endif /* _TRACE_AFS_H */
/* This part must be outside protection */
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 7d931db02b93..a743b2a35ea7 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -223,10 +223,10 @@ TRACE_EVENT(cachefiles_ref,
/* Note that obj may be NULL */
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, cookie )
- __field(enum cachefiles_obj_ref_trace, why )
- __field(int, usage )
+ __field(unsigned int, obj)
+ __field(unsigned int, cookie)
+ __field(enum cachefiles_obj_ref_trace, why)
+ __field(int, usage)
),
TP_fast_assign(
@@ -249,10 +249,10 @@ TRACE_EVENT(cachefiles_lookup,
TP_ARGS(obj, dir, de),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(short, error )
- __field(unsigned long, dino )
- __field(unsigned long, ino )
+ __field(unsigned int, obj)
+ __field(short, error)
+ __field(unsigned long, dino)
+ __field(unsigned long, ino)
),
TP_fast_assign(
@@ -273,8 +273,8 @@ TRACE_EVENT(cachefiles_mkdir,
TP_ARGS(dir, subdir),
TP_STRUCT__entry(
- __field(unsigned int, dir )
- __field(unsigned int, subdir )
+ __field(unsigned int, dir)
+ __field(unsigned int, subdir)
),
TP_fast_assign(
@@ -293,8 +293,8 @@ TRACE_EVENT(cachefiles_tmpfile,
TP_ARGS(obj, backer),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, backer )
+ __field(unsigned int, obj)
+ __field(unsigned int, backer)
),
TP_fast_assign(
@@ -313,8 +313,8 @@ TRACE_EVENT(cachefiles_link,
TP_ARGS(obj, backer),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, backer )
+ __field(unsigned int, obj)
+ __field(unsigned int, backer)
),
TP_fast_assign(
@@ -336,9 +336,9 @@ TRACE_EVENT(cachefiles_unlink,
/* Note that obj may be NULL */
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, ino )
- __field(enum fscache_why_object_killed, why )
+ __field(unsigned int, obj)
+ __field(unsigned int, ino)
+ __field(enum fscache_why_object_killed, why)
),
TP_fast_assign(
@@ -361,9 +361,9 @@ TRACE_EVENT(cachefiles_rename,
/* Note that obj may be NULL */
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, ino )
- __field(enum fscache_why_object_killed, why )
+ __field(unsigned int, obj)
+ __field(unsigned int, ino)
+ __field(enum fscache_why_object_killed, why)
),
TP_fast_assign(
@@ -380,17 +380,20 @@ TRACE_EVENT(cachefiles_rename,
TRACE_EVENT(cachefiles_coherency,
TP_PROTO(struct cachefiles_object *obj,
ino_t ino,
+ u64 disk_aux,
enum cachefiles_content content,
enum cachefiles_coherency_trace why),
- TP_ARGS(obj, ino, content, why),
+ TP_ARGS(obj, ino, disk_aux, content, why),
/* Note that obj may be NULL */
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(enum cachefiles_coherency_trace, why )
- __field(enum cachefiles_content, content )
- __field(u64, ino )
+ __field(unsigned int, obj)
+ __field(enum cachefiles_coherency_trace, why)
+ __field(enum cachefiles_content, content)
+ __field(u64, ino)
+ __field(u64, aux)
+ __field(u64, disk_aux)
),
TP_fast_assign(
@@ -398,13 +401,17 @@ TRACE_EVENT(cachefiles_coherency,
__entry->why = why;
__entry->content = content;
__entry->ino = ino;
+ __entry->aux = be64_to_cpup((__be64 *)obj->cookie->inline_aux);
+ __entry->disk_aux = disk_aux;
),
- TP_printk("o=%08x %s B=%llx c=%u",
+ TP_printk("o=%08x %s B=%llx c=%u aux=%llx dsk=%llx",
__entry->obj,
__print_symbolic(__entry->why, cachefiles_coherency_traces),
__entry->ino,
- __entry->content)
+ __entry->content,
+ __entry->aux,
+ __entry->disk_aux)
);
TRACE_EVENT(cachefiles_vol_coherency,
@@ -416,9 +423,9 @@ TRACE_EVENT(cachefiles_vol_coherency,
/* Note that obj may be NULL */
TP_STRUCT__entry(
- __field(unsigned int, vol )
- __field(enum cachefiles_coherency_trace, why )
- __field(u64, ino )
+ __field(unsigned int, vol)
+ __field(enum cachefiles_coherency_trace, why)
+ __field(u64, ino)
),
TP_fast_assign(
@@ -445,14 +452,14 @@ TRACE_EVENT(cachefiles_prep_read,
TP_ARGS(obj, start, len, flags, source, why, cache_inode, netfs_inode),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned short, flags )
- __field(enum netfs_io_source, source )
- __field(enum cachefiles_prepare_read_trace, why )
- __field(size_t, len )
- __field(loff_t, start )
- __field(unsigned int, netfs_inode )
- __field(unsigned int, cache_inode )
+ __field(unsigned int, obj)
+ __field(unsigned short, flags)
+ __field(enum netfs_io_source, source)
+ __field(enum cachefiles_prepare_read_trace, why)
+ __field(size_t, len)
+ __field(loff_t, start)
+ __field(unsigned int, netfs_inode)
+ __field(unsigned int, cache_inode)
),
TP_fast_assign(
@@ -484,10 +491,10 @@ TRACE_EVENT(cachefiles_read,
TP_ARGS(obj, backer, start, len),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, backer )
- __field(size_t, len )
- __field(loff_t, start )
+ __field(unsigned int, obj)
+ __field(unsigned int, backer)
+ __field(size_t, len)
+ __field(loff_t, start)
),
TP_fast_assign(
@@ -513,10 +520,10 @@ TRACE_EVENT(cachefiles_write,
TP_ARGS(obj, backer, start, len),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, backer )
- __field(size_t, len )
- __field(loff_t, start )
+ __field(unsigned int, obj)
+ __field(unsigned int, backer)
+ __field(size_t, len)
+ __field(loff_t, start)
),
TP_fast_assign(
@@ -540,11 +547,11 @@ TRACE_EVENT(cachefiles_trunc,
TP_ARGS(obj, backer, from, to, why),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, backer )
- __field(enum cachefiles_trunc_trace, why )
- __field(loff_t, from )
- __field(loff_t, to )
+ __field(unsigned int, obj)
+ __field(unsigned int, backer)
+ __field(enum cachefiles_trunc_trace, why)
+ __field(loff_t, from)
+ __field(loff_t, to)
),
TP_fast_assign(
@@ -571,8 +578,8 @@ TRACE_EVENT(cachefiles_mark_active,
/* Note that obj may be NULL */
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(ino_t, inode )
+ __field(unsigned int, obj)
+ __field(ino_t, inode)
),
TP_fast_assign(
@@ -592,8 +599,8 @@ TRACE_EVENT(cachefiles_mark_failed,
/* Note that obj may be NULL */
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(ino_t, inode )
+ __field(unsigned int, obj)
+ __field(ino_t, inode)
),
TP_fast_assign(
@@ -613,8 +620,8 @@ TRACE_EVENT(cachefiles_mark_inactive,
/* Note that obj may be NULL */
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(ino_t, inode )
+ __field(unsigned int, obj)
+ __field(ino_t, inode)
),
TP_fast_assign(
@@ -633,10 +640,10 @@ TRACE_EVENT(cachefiles_vfs_error,
TP_ARGS(obj, backer, error, where),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, backer )
- __field(enum cachefiles_error_trace, where )
- __field(short, error )
+ __field(unsigned int, obj)
+ __field(unsigned int, backer)
+ __field(enum cachefiles_error_trace, where)
+ __field(short, error)
),
TP_fast_assign(
@@ -660,10 +667,10 @@ TRACE_EVENT(cachefiles_io_error,
TP_ARGS(obj, backer, error, where),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, backer )
- __field(enum cachefiles_error_trace, where )
- __field(short, error )
+ __field(unsigned int, obj)
+ __field(unsigned int, backer)
+ __field(enum cachefiles_error_trace, where)
+ __field(short, error)
),
TP_fast_assign(
@@ -687,11 +694,11 @@ TRACE_EVENT(cachefiles_ondemand_open,
TP_ARGS(obj, msg, load),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, msg_id )
- __field(unsigned int, object_id )
- __field(unsigned int, fd )
- __field(unsigned int, flags )
+ __field(unsigned int, obj)
+ __field(unsigned int, msg_id)
+ __field(unsigned int, object_id)
+ __field(unsigned int, fd)
+ __field(unsigned int, flags)
),
TP_fast_assign(
@@ -717,9 +724,9 @@ TRACE_EVENT(cachefiles_ondemand_copen,
TP_ARGS(obj, msg_id, len),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, msg_id )
- __field(long, len )
+ __field(unsigned int, obj)
+ __field(unsigned int, msg_id)
+ __field(long, len)
),
TP_fast_assign(
@@ -740,9 +747,9 @@ TRACE_EVENT(cachefiles_ondemand_close,
TP_ARGS(obj, msg),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, msg_id )
- __field(unsigned int, object_id )
+ __field(unsigned int, obj)
+ __field(unsigned int, msg_id)
+ __field(unsigned int, object_id)
),
TP_fast_assign(
@@ -764,11 +771,11 @@ TRACE_EVENT(cachefiles_ondemand_read,
TP_ARGS(obj, msg, load),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, msg_id )
- __field(unsigned int, object_id )
- __field(loff_t, start )
- __field(size_t, len )
+ __field(unsigned int, obj)
+ __field(unsigned int, msg_id)
+ __field(unsigned int, object_id)
+ __field(loff_t, start)
+ __field(size_t, len)
),
TP_fast_assign(
@@ -793,8 +800,8 @@ TRACE_EVENT(cachefiles_ondemand_cread,
TP_ARGS(obj, msg_id),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, msg_id )
+ __field(unsigned int, obj)
+ __field(unsigned int, msg_id)
),
TP_fast_assign(
@@ -814,10 +821,10 @@ TRACE_EVENT(cachefiles_ondemand_fd_write,
TP_ARGS(obj, backer, start, len),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, backer )
- __field(loff_t, start )
- __field(size_t, len )
+ __field(unsigned int, obj)
+ __field(unsigned int, backer)
+ __field(loff_t, start)
+ __field(size_t, len)
),
TP_fast_assign(
@@ -840,8 +847,8 @@ TRACE_EVENT(cachefiles_ondemand_fd_release,
TP_ARGS(obj, object_id),
TP_STRUCT__entry(
- __field(unsigned int, obj )
- __field(unsigned int, object_id )
+ __field(unsigned int, obj)
+ __field(unsigned int, object_id)
),
TP_fast_assign(
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index bf511bca896e..6e699cadcb29 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -21,6 +21,7 @@
EM(netfs_read_trace_readahead, "READAHEAD") \
EM(netfs_read_trace_readpage, "READPAGE ") \
EM(netfs_read_trace_read_gaps, "READ-GAPS") \
+ EM(netfs_read_trace_read_single, "READ-SNGL") \
EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \
E_(netfs_read_trace_write_begin, "WRITEBEGN")
@@ -35,9 +36,11 @@
EM(NETFS_READAHEAD, "RA") \
EM(NETFS_READPAGE, "RP") \
EM(NETFS_READ_GAPS, "RG") \
+ EM(NETFS_READ_SINGLE, "R1") \
EM(NETFS_READ_FOR_WRITE, "RW") \
EM(NETFS_DIO_READ, "DR") \
EM(NETFS_WRITEBACK, "WB") \
+ EM(NETFS_WRITEBACK_SINGLE, "W1") \
EM(NETFS_WRITETHROUGH, "WT") \
EM(NETFS_UNBUFFERED_WRITE, "UW") \
EM(NETFS_DIO_WRITE, "DW") \
@@ -47,17 +50,23 @@
EM(netfs_rreq_trace_assess, "ASSESS ") \
EM(netfs_rreq_trace_copy, "COPY ") \
EM(netfs_rreq_trace_collect, "COLLECT") \
+ EM(netfs_rreq_trace_complete, "COMPLET") \
+ EM(netfs_rreq_trace_dirty, "DIRTY ") \
EM(netfs_rreq_trace_done, "DONE ") \
EM(netfs_rreq_trace_free, "FREE ") \
EM(netfs_rreq_trace_redirty, "REDIRTY") \
EM(netfs_rreq_trace_resubmit, "RESUBMT") \
+ EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \
EM(netfs_rreq_trace_set_pause, "PAUSE ") \
EM(netfs_rreq_trace_unlock, "UNLOCK ") \
EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \
EM(netfs_rreq_trace_unmark, "UNMARK ") \
EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \
EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \
+ EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \
EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \
+ EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \
+ EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \
EM(netfs_rreq_trace_unpause, "UNPAUSE") \
E_(netfs_rreq_trace_write_done, "WR-DONE")
@@ -74,6 +83,10 @@
#define netfs_sreq_traces \
EM(netfs_sreq_trace_add_donations, "+DON ") \
EM(netfs_sreq_trace_added, "ADD ") \
+ EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \
+ EM(netfs_sreq_trace_cache_prepare, "CA-PR") \
+ EM(netfs_sreq_trace_cache_write, "CA-WR") \
+ EM(netfs_sreq_trace_cancel, "CANCL") \
EM(netfs_sreq_trace_clear, "CLEAR") \
EM(netfs_sreq_trace_discard, "DSCRD") \
EM(netfs_sreq_trace_donate_to_prev, "DON-P") \
@@ -84,6 +97,9 @@
EM(netfs_sreq_trace_hit_eof, "EOF ") \
EM(netfs_sreq_trace_io_progress, "IO ") \
EM(netfs_sreq_trace_limited, "LIMIT") \
+ EM(netfs_sreq_trace_need_clear, "N-CLR") \
+ EM(netfs_sreq_trace_partial_read, "PARTR") \
+ EM(netfs_sreq_trace_need_retry, "NRTRY") \
EM(netfs_sreq_trace_prepare, "PREP ") \
EM(netfs_sreq_trace_prep_failed, "PRPFL") \
EM(netfs_sreq_trace_progress, "PRGRS") \
@@ -129,6 +145,7 @@
EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \
EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \
EM(netfs_sreq_trace_new, "NEW ") \
+ EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \
EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \
EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \
EM(netfs_sreq_trace_put_consumed, "PUT CONSUME") \
@@ -152,6 +169,7 @@
EM(netfs_streaming_filled_page, "mod-streamw-f") \
EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \
EM(netfs_folio_trace_abandon, "abandon") \
+ EM(netfs_folio_trace_alloc_buffer, "alloc-buf") \
EM(netfs_folio_trace_cancel_copy, "cancel-copy") \
EM(netfs_folio_trace_cancel_store, "cancel-store") \
EM(netfs_folio_trace_clear, "clear") \
@@ -168,6 +186,7 @@
EM(netfs_folio_trace_mkwrite, "mkwrite") \
EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \
EM(netfs_folio_trace_not_under_wback, "!wback") \
+ EM(netfs_folio_trace_not_locked, "!locked") \
EM(netfs_folio_trace_put, "put") \
EM(netfs_folio_trace_read, "read") \
EM(netfs_folio_trace_read_done, "read-done") \
@@ -191,6 +210,14 @@
EM(netfs_trace_donate_to_next, "to-next") \
E_(netfs_trace_donate_to_deferred_next, "defer-next")
+#define netfs_folioq_traces \
+ EM(netfs_trace_folioq_alloc_buffer, "alloc-buf") \
+ EM(netfs_trace_folioq_clear, "clear") \
+ EM(netfs_trace_folioq_delete, "delete") \
+ EM(netfs_trace_folioq_make_space, "make-space") \
+ EM(netfs_trace_folioq_rollbuf_init, "roll-init") \
+ E_(netfs_trace_folioq_read_progress, "r-progress")
+
#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
@@ -209,6 +236,7 @@ enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte);
enum netfs_folio_trace { netfs_folio_traces } __mode(byte);
enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte);
enum netfs_donate_trace { netfs_donate_traces } __mode(byte);
+enum netfs_folioq_trace { netfs_folioq_traces } __mode(byte);
#endif
@@ -232,6 +260,7 @@ netfs_sreq_ref_traces;
netfs_folio_traces;
netfs_collect_contig_traces;
netfs_donate_traces;
+netfs_folioq_traces;
/*
* Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -250,13 +279,13 @@ TRACE_EVENT(netfs_read,
TP_ARGS(rreq, start, len, what),
TP_STRUCT__entry(
- __field(unsigned int, rreq )
- __field(unsigned int, cookie )
- __field(loff_t, i_size )
- __field(loff_t, start )
- __field(size_t, len )
- __field(enum netfs_read_trace, what )
- __field(unsigned int, netfs_inode )
+ __field(unsigned int, rreq)
+ __field(unsigned int, cookie)
+ __field(loff_t, i_size)
+ __field(loff_t, start)
+ __field(size_t, len)
+ __field(enum netfs_read_trace, what)
+ __field(unsigned int, netfs_inode)
),
TP_fast_assign(
@@ -284,10 +313,10 @@ TRACE_EVENT(netfs_rreq,
TP_ARGS(rreq, what),
TP_STRUCT__entry(
- __field(unsigned int, rreq )
- __field(unsigned int, flags )
- __field(enum netfs_io_origin, origin )
- __field(enum netfs_rreq_trace, what )
+ __field(unsigned int, rreq)
+ __field(unsigned int, flags)
+ __field(enum netfs_io_origin, origin)
+ __field(enum netfs_rreq_trace, what)
),
TP_fast_assign(
@@ -311,15 +340,16 @@ TRACE_EVENT(netfs_sreq,
TP_ARGS(sreq, what),
TP_STRUCT__entry(
- __field(unsigned int, rreq )
- __field(unsigned short, index )
- __field(short, error )
- __field(unsigned short, flags )
- __field(enum netfs_io_source, source )
- __field(enum netfs_sreq_trace, what )
- __field(size_t, len )
- __field(size_t, transferred )
- __field(loff_t, start )
+ __field(unsigned int, rreq)
+ __field(unsigned short, index)
+ __field(short, error)
+ __field(unsigned short, flags)
+ __field(enum netfs_io_source, source)
+ __field(enum netfs_sreq_trace, what)
+ __field(u8, slot)
+ __field(size_t, len)
+ __field(size_t, transferred)
+ __field(loff_t, start)
),
TP_fast_assign(
@@ -332,15 +362,16 @@ TRACE_EVENT(netfs_sreq,
__entry->len = sreq->len;
__entry->transferred = sreq->transferred;
__entry->start = sreq->start;
+ __entry->slot = sreq->io_iter.folioq_slot;
),
- TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx e=%d",
+ TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d",
__entry->rreq, __entry->index,
__print_symbolic(__entry->source, netfs_sreq_sources),
__print_symbolic(__entry->what, netfs_sreq_traces),
__entry->flags,
__entry->start, __entry->transferred, __entry->len,
- __entry->error)
+ __entry->slot, __entry->error)
);
TRACE_EVENT(netfs_failure,
@@ -351,15 +382,15 @@ TRACE_EVENT(netfs_failure,
TP_ARGS(rreq, sreq, error, what),
TP_STRUCT__entry(
- __field(unsigned int, rreq )
- __field(short, index )
- __field(short, error )
- __field(unsigned short, flags )
- __field(enum netfs_io_source, source )
- __field(enum netfs_failure, what )
- __field(size_t, len )
- __field(size_t, transferred )
- __field(loff_t, start )
+ __field(unsigned int, rreq)
+ __field(short, index)
+ __field(short, error)
+ __field(unsigned short, flags)
+ __field(enum netfs_io_source, source)
+ __field(enum netfs_failure, what)
+ __field(size_t, len)
+ __field(size_t, transferred)
+ __field(loff_t, start)
),
TP_fast_assign(
@@ -390,9 +421,9 @@ TRACE_EVENT(netfs_rreq_ref,
TP_ARGS(rreq_debug_id, ref, what),
TP_STRUCT__entry(
- __field(unsigned int, rreq )
- __field(int, ref )
- __field(enum netfs_rreq_ref_trace, what )
+ __field(unsigned int, rreq)
+ __field(int, ref)
+ __field(enum netfs_rreq_ref_trace, what)
),
TP_fast_assign(
@@ -414,10 +445,10 @@ TRACE_EVENT(netfs_sreq_ref,
TP_ARGS(rreq_debug_id, subreq_debug_index, ref, what),
TP_STRUCT__entry(
- __field(unsigned int, rreq )
- __field(unsigned int, subreq )
- __field(int, ref )
- __field(enum netfs_sreq_ref_trace, what )
+ __field(unsigned int, rreq)
+ __field(unsigned int, subreq)
+ __field(int, ref)
+ __field(enum netfs_sreq_ref_trace, what)
),
TP_fast_assign(
@@ -465,10 +496,10 @@ TRACE_EVENT(netfs_write_iter,
TP_ARGS(iocb, from),
TP_STRUCT__entry(
- __field(unsigned long long, start )
- __field(size_t, len )
- __field(unsigned int, flags )
- __field(unsigned int, ino )
+ __field(unsigned long long, start)
+ __field(size_t, len)
+ __field(unsigned int, flags)
+ __field(unsigned int, ino)
),
TP_fast_assign(
@@ -489,12 +520,12 @@ TRACE_EVENT(netfs_write,
TP_ARGS(wreq, what),
TP_STRUCT__entry(
- __field(unsigned int, wreq )
- __field(unsigned int, cookie )
- __field(unsigned int, ino )
- __field(enum netfs_write_trace, what )
- __field(unsigned long long, start )
- __field(unsigned long long, len )
+ __field(unsigned int, wreq)
+ __field(unsigned int, cookie)
+ __field(unsigned int, ino)
+ __field(enum netfs_write_trace, what)
+ __field(unsigned long long, start)
+ __field(unsigned long long, len)
),
TP_fast_assign(
@@ -522,10 +553,10 @@ TRACE_EVENT(netfs_collect,
TP_ARGS(wreq),
TP_STRUCT__entry(
- __field(unsigned int, wreq )
- __field(unsigned int, len )
- __field(unsigned long long, transferred )
- __field(unsigned long long, start )
+ __field(unsigned int, wreq)
+ __field(unsigned int, len)
+ __field(unsigned long long, transferred)
+ __field(unsigned long long, start)
),
TP_fast_assign(
@@ -548,12 +579,12 @@ TRACE_EVENT(netfs_collect_sreq,
TP_ARGS(wreq, subreq),
TP_STRUCT__entry(
- __field(unsigned int, wreq )
- __field(unsigned int, subreq )
- __field(unsigned int, stream )
- __field(unsigned int, len )
- __field(unsigned int, transferred )
- __field(unsigned long long, start )
+ __field(unsigned int, wreq)
+ __field(unsigned int, subreq)
+ __field(unsigned int, stream)
+ __field(unsigned int, len)
+ __field(unsigned int, transferred)
+ __field(unsigned long long, start)
),
TP_fast_assign(
@@ -579,11 +610,11 @@ TRACE_EVENT(netfs_collect_folio,
TP_ARGS(wreq, folio, fend, collected_to),
TP_STRUCT__entry(
- __field(unsigned int, wreq )
- __field(unsigned long, index )
- __field(unsigned long long, fend )
- __field(unsigned long long, cleaned_to )
- __field(unsigned long long, collected_to )
+ __field(unsigned int, wreq)
+ __field(unsigned long, index)
+ __field(unsigned long long, fend)
+ __field(unsigned long long, cleaned_to)
+ __field(unsigned long long, collected_to)
),
TP_fast_assign(
@@ -608,10 +639,10 @@ TRACE_EVENT(netfs_collect_state,
TP_ARGS(wreq, collected_to, notes),
TP_STRUCT__entry(
- __field(unsigned int, wreq )
- __field(unsigned int, notes )
- __field(unsigned long long, collected_to )
- __field(unsigned long long, cleaned_to )
+ __field(unsigned int, wreq)
+ __field(unsigned int, notes)
+ __field(unsigned long long, collected_to)
+ __field(unsigned long long, cleaned_to)
),
TP_fast_assign(
@@ -680,69 +711,27 @@ TRACE_EVENT(netfs_collect_stream,
__entry->collected_to, __entry->front)
);
-TRACE_EVENT(netfs_progress,
- TP_PROTO(const struct netfs_io_subrequest *subreq,
- unsigned long long start, size_t avail, size_t part),
+TRACE_EVENT(netfs_folioq,
+ TP_PROTO(const struct folio_queue *fq,
+ enum netfs_folioq_trace trace),
- TP_ARGS(subreq, start, avail, part),
+ TP_ARGS(fq, trace),
TP_STRUCT__entry(
__field(unsigned int, rreq)
- __field(unsigned int, subreq)
- __field(unsigned int, consumed)
- __field(unsigned int, transferred)
- __field(unsigned long long, f_start)
- __field(unsigned int, f_avail)
- __field(unsigned int, f_part)
- __field(unsigned char, slot)
+ __field(unsigned int, id)
+ __field(enum netfs_folioq_trace, trace)
),
TP_fast_assign(
- __entry->rreq = subreq->rreq->debug_id;
- __entry->subreq = subreq->debug_index;
- __entry->consumed = subreq->consumed;
- __entry->transferred = subreq->transferred;
- __entry->f_start = start;
- __entry->f_avail = avail;
- __entry->f_part = part;
- __entry->slot = subreq->curr_folioq_slot;
- ),
-
- TP_printk("R=%08x[%02x] s=%llx ct=%x/%x pa=%x/%x sl=%x",
- __entry->rreq, __entry->subreq, __entry->f_start,
- __entry->consumed, __entry->transferred,
- __entry->f_part, __entry->f_avail, __entry->slot)
- );
-
-TRACE_EVENT(netfs_donate,
- TP_PROTO(const struct netfs_io_request *rreq,
- const struct netfs_io_subrequest *from,
- const struct netfs_io_subrequest *to,
- size_t amount,
- enum netfs_donate_trace trace),
-
- TP_ARGS(rreq, from, to, amount, trace),
-
- TP_STRUCT__entry(
- __field(unsigned int, rreq)
- __field(unsigned int, from)
- __field(unsigned int, to)
- __field(unsigned int, amount)
- __field(enum netfs_donate_trace, trace)
- ),
-
- TP_fast_assign(
- __entry->rreq = rreq->debug_id;
- __entry->from = from->debug_index;
- __entry->to = to ? to->debug_index : -1;
- __entry->amount = amount;
+ __entry->rreq = fq ? fq->rreq_id : 0;
+ __entry->id = fq ? fq->debug_id : 0;
__entry->trace = trace;
),
- TP_printk("R=%08x[%02x] -> [%02x] %s am=%x",
- __entry->rreq, __entry->from, __entry->to,
- __print_symbolic(__entry->trace, netfs_donate_traces),
- __entry->amount)
+ TP_printk("R=%08x fq=%x %s",
+ __entry->rreq, __entry->id,
+ __print_symbolic(__entry->trace, netfs_folioq_traces))
);
#undef EM
diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c
index 13e15687675a..10a560feb66e 100644
--- a/lib/kunit_iov_iter.c
+++ b/lib/kunit_iov_iter.c
@@ -392,7 +392,7 @@ static void __init iov_kunit_load_folioq(struct kunit *test,
if (folioq_full(p)) {
p->next = kzalloc(sizeof(struct folio_queue), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p->next);
- folioq_init(p->next);
+ folioq_init(p->next, 0);
p->next->prev = p;
p = p->next;
}
@@ -409,7 +409,7 @@ static struct folio_queue *iov_kunit_create_folioq(struct kunit *test)
folioq = kzalloc(sizeof(struct folio_queue), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, folioq);
kunit_add_action_or_reset(test, iov_kunit_destroy_folioq, folioq);
- folioq_init(folioq);
+ folioq_init(folioq, 0);
return folioq;
}