diff options
Diffstat (limited to 'fs/nfsd')
-rw-r--r-- | fs/nfsd/Kconfig | 15 | ||||
-rw-r--r-- | fs/nfsd/Makefile | 1 | ||||
-rw-r--r-- | fs/nfsd/debugfs.c | 47 | ||||
-rw-r--r-- | fs/nfsd/export.c | 3 | ||||
-rw-r--r-- | fs/nfsd/filecache.c | 122 | ||||
-rw-r--r-- | fs/nfsd/filecache.h | 7 | ||||
-rw-r--r-- | fs/nfsd/nfs3proc.c | 72 | ||||
-rw-r--r-- | fs/nfsd/nfs3xdr.c | 4 | ||||
-rw-r--r-- | fs/nfsd/nfs4callback.c | 276 | ||||
-rw-r--r-- | fs/nfsd/nfs4layouts.c | 7 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 41 | ||||
-rw-r--r-- | fs/nfsd/nfs4recover.c | 81 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 154 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 25 | ||||
-rw-r--r-- | fs/nfsd/nfsctl.c | 78 | ||||
-rw-r--r-- | fs/nfsd/nfsd.h | 34 | ||||
-rw-r--r-- | fs/nfsd/nfsfh.h | 7 | ||||
-rw-r--r-- | fs/nfsd/nfsproc.c | 53 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 8 | ||||
-rw-r--r-- | fs/nfsd/nfsxdr.c | 4 | ||||
-rw-r--r-- | fs/nfsd/state.h | 43 | ||||
-rw-r--r-- | fs/nfsd/stats.c | 4 | ||||
-rw-r--r-- | fs/nfsd/stats.h | 2 | ||||
-rw-r--r-- | fs/nfsd/trace.h | 326 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 247 | ||||
-rw-r--r-- | fs/nfsd/vfs.h | 10 | ||||
-rw-r--r-- | fs/nfsd/xdr4.h | 4 | ||||
-rw-r--r-- | fs/nfsd/xdr4cb.h | 5 |
28 files changed, 1137 insertions, 543 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index c0bd1509ccd4..879e0b104d1c 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -4,6 +4,7 @@ config NFSD depends on INET depends on FILE_LOCKING depends on FSNOTIFY + select CRC32 select LOCKD select SUNRPC select EXPORTFS @@ -76,8 +77,8 @@ config NFSD_V4 select FS_POSIX_ACL select RPCSEC_GSS_KRB5 select CRYPTO + select CRYPTO_LIB_SHA256 select CRYPTO_MD5 - select CRYPTO_SHA256 select GRACE_PERIOD select NFS_V4_2_SSC_HELPER if NFS_V4_2 help @@ -172,6 +173,16 @@ config NFSD_LEGACY_CLIENT_TRACKING recoverydir, or spawn a process directly using a usermodehelper upcall. - These legacy client tracking methods have proven to be probelmatic + These legacy client tracking methods have proven to be problematic and will be removed in the future. Say Y here if you need support for them in the interim. + +config NFSD_V4_DELEG_TIMESTAMPS + bool "Support delegated timestamps" + depends on NFSD_V4 + default n + help + NFSD implements delegated timestamps according to + draft-ietf-nfsv4-delstid-08 "Extending the Opening of Files". This + is currently an experimental feature and is therefore left disabled + by default. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 2f687619f65b..55744bb786c9 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -24,6 +24,7 @@ nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o nfsd-$(CONFIG_NFS_LOCALIO) += localio.o +nfsd-$(CONFIG_DEBUG_FS) += debugfs.o .PHONY: xdrgen diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c new file mode 100644 index 000000000000..84b0c8b559dc --- /dev/null +++ b/fs/nfsd/debugfs.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/debugfs.h> + +#include "nfsd.h" + +static struct dentry *nfsd_top_dir __read_mostly; + +/* + * /sys/kernel/debug/nfsd/disable-splice-read + * + * Contents: + * %0: NFS READ is allowed to use page splicing + * %1: NFS READ uses only iov iter read + * + * The default value of this setting is zero (page splicing is + * allowed). This setting takes immediate effect for all NFS + * versions, all exports, and in all NFSD net namespaces. + */ + +static int nfsd_dsr_get(void *data, u64 *val) +{ + *val = nfsd_disable_splice_read ? 1 : 0; + return 0; +} + +static int nfsd_dsr_set(void *data, u64 val) +{ + nfsd_disable_splice_read = (val > 0) ? true : false; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n"); + +void nfsd_debugfs_exit(void) +{ + debugfs_remove_recursive(nfsd_top_dir); + nfsd_top_dir = NULL; +} + +void nfsd_debugfs_init(void) +{ + nfsd_top_dir = debugfs_create_dir("nfsd", NULL); + + debugfs_create_file("disable-splice-read", S_IWUSR | S_IRUGO, + nfsd_top_dir, NULL, &nfsd_dsr_fops); +} diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 0363720280d4..88ae410b4113 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1124,7 +1124,8 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, test_bit(XPT_PEER_AUTH, &xprt->xpt_flags)) goto ok; } - goto denied; + if (!may_bypass_gss) + goto denied; ok: /* legacy gss-only clients are always OK: */ diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index fb9b1656a287..ab85e6a2454f 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -319,15 +319,14 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } - -static bool nfsd_file_lru_add(struct nfsd_file *nf) +static void nfsd_file_lru_add(struct nfsd_file *nf) { - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) { + refcount_inc(&nf->nf_ref); + if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) trace_nfsd_file_lru_add(nf); - return true; - } - return false; + else + WARN_ON(1); + nfsd_file_schedule_laundrette(); } static bool nfsd_file_lru_remove(struct nfsd_file *nf) @@ -363,30 +362,10 @@ nfsd_file_put(struct nfsd_file *nf) if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - /* - * If this is the last reference (nf_ref == 1), then try to - * transfer it to the LRU. - */ - if (refcount_dec_not_one(&nf->nf_ref)) - return; - - /* Try to add it to the LRU. If that fails, decrement. */ - if (nfsd_file_lru_add(nf)) { - /* If it's still hashed, we're done */ - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_schedule_laundrette(); - return; - } - - /* - * We're racing with unhashing, so try to remove it from - * the LRU. If removal fails, then someone else already - * has our reference. - */ - if (!nfsd_file_lru_remove(nf)) - return; - } + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + set_bit(NFSD_FILE_RECENT, &nf->nf_flags); } + if (refcount_dec_and_test(&nf->nf_ref)) nfsd_file_free(nf); } @@ -530,13 +509,12 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, } /* - * Put the reference held on behalf of the LRU. If it wasn't the last - * one, then just remove it from the LRU and ignore it. + * Put the reference held on behalf of the LRU if it is the last + * reference, else rotate. */ - if (!refcount_dec_and_test(&nf->nf_ref)) { + if (!refcount_dec_if_one(&nf->nf_ref)) { trace_nfsd_file_gc_in_use(nf); - list_lru_isolate(lru, &nf->nf_lru); - return LRU_REMOVED; + return LRU_ROTATE; } /* Refcount went to zero. Unhash it and queue it to the dispose list */ @@ -548,14 +526,54 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, return LRU_REMOVED; } +static enum lru_status +nfsd_file_gc_cb(struct list_head *item, struct list_lru_one *lru, + void *arg) +{ + struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); + + if (test_and_clear_bit(NFSD_FILE_RECENT, &nf->nf_flags)) { + /* + * "REFERENCED" really means "should be at the end of the + * LRU. As we are putting it there we can clear the flag. + */ + clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + trace_nfsd_file_gc_aged(nf); + return LRU_ROTATE; + } + return nfsd_file_lru_cb(item, lru, arg); +} + +/* If the shrinker runs between calls to list_lru_walk_node() in + * nfsd_file_gc(), the "remaining" count will be wrong. This could + * result in premature freeing of some files. This may not matter much + * but is easy to fix with this spinlock which temporarily disables + * the shrinker. + */ +static DEFINE_SPINLOCK(nfsd_gc_lock); static void nfsd_file_gc(void) { + unsigned long ret = 0; LIST_HEAD(dispose); - unsigned long ret; + int nid; + + spin_lock(&nfsd_gc_lock); + for_each_node_state(nid, N_NORMAL_MEMORY) { + unsigned long remaining = list_lru_count_node(&nfsd_file_lru, nid); + + while (remaining > 0) { + unsigned long nr = min(remaining, NFSD_FILE_GC_BATCH); - ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, - &dispose, list_lru_count(&nfsd_file_lru)); + remaining -= nr; + ret += list_lru_walk_node(&nfsd_file_lru, nid, nfsd_file_gc_cb, + &dispose, &nr); + if (nr) + /* walk aborted early */ + remaining = 0; + } + } + spin_unlock(&nfsd_gc_lock); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_dispose_list_delayed(&dispose); } @@ -563,9 +581,9 @@ nfsd_file_gc(void) static void nfsd_file_gc_worker(struct work_struct *work) { - nfsd_file_gc(); if (list_lru_count(&nfsd_file_lru)) - nfsd_file_schedule_laundrette(); + nfsd_file_gc(); + nfsd_file_schedule_laundrette(); } static unsigned long @@ -580,8 +598,12 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) LIST_HEAD(dispose); unsigned long ret; + if (!spin_trylock(&nfsd_gc_lock)) + return SHRINK_STOP; + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); + spin_unlock(&nfsd_gc_lock); trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_dispose_list_delayed(&dispose); return ret; @@ -686,17 +708,12 @@ nfsd_file_close_inode(struct inode *inode) void nfsd_file_close_inode_sync(struct inode *inode) { - struct nfsd_file *nf; LIST_HEAD(dispose); trace_nfsd_file_close(inode); nfsd_file_queue_for_close(inode, &dispose); - while (!list_empty(&dispose)) { - nf = list_first_entry(&dispose, struct nfsd_file, nf_gc); - list_del_init(&nf->nf_gc); - nfsd_file_free(nf); - } + nfsd_file_dispose_list(&dispose); } static int @@ -1058,16 +1075,8 @@ retry: nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc); rcu_read_unlock(); - if (nf) { - /* - * If the nf is on the LRU then it holds an extra reference - * that must be put if it's removed. It had better not be - * the last one however, since we should hold another. - */ - if (nfsd_file_lru_remove(nf)) - refcount_dec(&nf->nf_ref); + if (nf) goto wait_for_construction; - } new = nfsd_file_alloc(net, inode, need, want_gc); if (!new) { @@ -1161,6 +1170,9 @@ open_file: */ if (status != nfs_ok || inode->i_nlink == 0) nfsd_file_unhash(nf); + else if (want_gc) + nfsd_file_lru_add(nf); + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); if (status == nfs_ok) goto out; diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index d5db6b34ba30..5865f9c72712 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -4,6 +4,12 @@ #include <linux/fsnotify_backend.h> /* + * Limit the time that the list_lru_one lock is held during + * an LRU scan. + */ +#define NFSD_FILE_GC_BATCH (16UL) + +/* * This is the fsnotify_mark container that nfsd attaches to the files that it * is holding open. Note that we have a separate refcount here aside from the * one in the fsnotify_mark. We only want a single fsnotify_mark attached to @@ -38,6 +44,7 @@ struct nfsd_file { #define NFSD_FILE_PENDING (1) #define NFSD_FILE_REFERENCED (2) #define NFSD_FILE_GC (3) +#define NFSD_FILE_RECENT (4) unsigned long nf_flags; refcount_t nf_ref; unsigned char nf_may; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 372bdcf5e07a..a817d8485d21 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -14,6 +14,7 @@ #include "xdr3.h" #include "vfs.h" #include "filecache.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -69,8 +70,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - dprintk("nfsd: GETATTR(3) %s\n", - SVCFH_fmt(&argp->fh)); + trace_nfsd_vfs_getattr(rqstp, &argp->fh); fh_copy(&resp->fh, &argp->fh); resp->status = fh_verify(rqstp, &resp->fh, 0, @@ -220,7 +220,6 @@ nfsd3_proc_write(struct svc_rqst *rqstp) struct nfsd3_writeargs *argp = rqstp->rq_argp; struct nfsd3_writeres *resp = rqstp->rq_resp; unsigned long cnt = argp->len; - unsigned int nvecs; dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n", SVCFH_fmt(&argp->fh), @@ -235,10 +234,8 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nvecs = svc_fill_write_vector(rqstp, &argp->payload); - resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, nvecs, &cnt, + &argp->payload, &cnt, resp->committed, resp->verf); resp->count = cnt; resp->status = nfsd3_map_status(resp->status); @@ -266,6 +263,8 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 status; int host_err; + trace_nfsd_vfs_create(rqstp, fhp, S_IFREG, argp->name, argp->len); + if (isdotent(argp->name, argp->len)) return nfserr_exist; if (!(iap->ia_valid & ATTR_MODE)) @@ -284,7 +283,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, inode_lock_nested(inode, I_MUTEX_PARENT); - child = lookup_one_len(argp->name, parent, argp->len); + child = lookup_one(&nop_mnt_idmap, + &QSTR_LEN(argp->name, argp->len), + parent); if (IS_ERR(child)) { status = nfserrno(PTR_ERR(child)); goto out; @@ -380,11 +381,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp) struct nfsd3_diropres *resp = rqstp->rq_resp; svc_fh *dirfhp, *newfhp; - dprintk("nfsd: CREATE(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - dirfhp = fh_copy(&resp->dirfh, &argp->fh); newfhp = fh_init(&resp->fh, NFS3_FHSIZE); @@ -405,11 +401,6 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) .na_iattr = &argp->attrs, }; - dprintk("nfsd: MKDIR(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - argp->attrs.ia_valid &= ~ATTR_SIZE; fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); @@ -445,11 +436,6 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) goto out; } - dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n", - SVCFH_fmt(&argp->ffh), - argp->flen, argp->fname, - argp->tlen, argp->tname); - fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, @@ -474,11 +460,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) int type; dev_t rdev = 0; - dprintk("nfsd: MKNOD(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); @@ -511,11 +492,6 @@ nfsd3_proc_remove(struct svc_rqst *rqstp) struct nfsd3_diropargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - dprintk("nfsd: REMOVE(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - /* Unlink. -S_IFDIR means file must not be a directory */ fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, @@ -533,11 +509,6 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp) struct nfsd3_diropargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - dprintk("nfsd: RMDIR(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); @@ -551,15 +522,6 @@ nfsd3_proc_rename(struct svc_rqst *rqstp) struct nfsd3_renameargs *argp = rqstp->rq_argp; struct nfsd3_renameres *resp = rqstp->rq_resp; - dprintk("nfsd: RENAME(3) %s %.*s ->\n", - SVCFH_fmt(&argp->ffh), - argp->flen, - argp->fname); - dprintk("nfsd: -> %s %.*s\n", - SVCFH_fmt(&argp->tfh), - argp->tlen, - argp->tname); - fh_copy(&resp->ffh, &argp->ffh); fh_copy(&resp->tfh, &argp->tfh); resp->status = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen, @@ -574,13 +536,6 @@ nfsd3_proc_link(struct svc_rqst *rqstp) struct nfsd3_linkargs *argp = rqstp->rq_argp; struct nfsd3_linkres *resp = rqstp->rq_resp; - dprintk("nfsd: LINK(3) %s ->\n", - SVCFH_fmt(&argp->ffh)); - dprintk("nfsd: -> %s %.*s\n", - SVCFH_fmt(&argp->tfh), - argp->tlen, - argp->tname); - fh_copy(&resp->fh, &argp->ffh); fh_copy(&resp->tfh, &argp->tfh); resp->status = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen, @@ -619,9 +574,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) struct nfsd3_readdirres *resp = rqstp->rq_resp; loff_t offset; - dprintk("nfsd: READDIR(3) %s %d bytes at %d\n", - SVCFH_fmt(&argp->fh), - argp->count, (u32) argp->cookie); + trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie); nfsd3_init_dirlist_pages(rqstp, resp, argp->count); @@ -653,9 +606,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) struct nfsd3_readdirres *resp = rqstp->rq_resp; loff_t offset; - dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", - SVCFH_fmt(&argp->fh), - argp->count, (u32) argp->cookie); + trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie); nfsd3_init_dirlist_pages(rqstp, resp, argp->count); @@ -696,9 +647,6 @@ nfsd3_proc_fsstat(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_fsstatres *resp = rqstp->rq_resp; - dprintk("nfsd: FSSTAT(3) %s\n", - SVCFH_fmt(&argp->fh)); - resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); fh_put(&argp->fh); resp->status = nfsd3_map_status(resp->status); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index a7a07470c1f8..ef4971d71ac4 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1001,7 +1001,9 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, } else dchild = dget(dparent); } else - dchild = lookup_positive_unlocked(name, dparent, namlen); + dchild = lookup_one_positive_unlocked(&nop_mnt_idmap, + &QSTR_LEN(name, namlen), + dparent); if (IS_ERR(dchild)) return rv; if (d_mountpoint(dchild)) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 484077200c5d..ccb00aa93be0 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -46,8 +46,6 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC -static void nfsd4_mark_cb_fault(struct nfs4_client *clp); - #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 @@ -101,15 +99,15 @@ static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap, if (bitmap[0] & FATTR4_WORD0_CHANGE) if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0) - return -NFSERR_BAD_XDR; + return -EIO; if (bitmap[0] & FATTR4_WORD0_SIZE) if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0) - return -NFSERR_BAD_XDR; + return -EIO; if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) { fattr4_time_deleg_access access; if (!xdrgen_decode_fattr4_time_deleg_access(xdr, &access)) - return -NFSERR_BAD_XDR; + return -EIO; fattr->ncf_cb_atime.tv_sec = access.seconds; fattr->ncf_cb_atime.tv_nsec = access.nseconds; @@ -118,7 +116,7 @@ static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap, fattr4_time_deleg_modify modify; if (!xdrgen_decode_fattr4_time_deleg_modify(xdr, &modify)) - return -NFSERR_BAD_XDR; + return -EIO; fattr->ncf_cb_mtime.tv_sec = modify.seconds; fattr->ncf_cb_mtime.tv_nsec = modify.nseconds; @@ -419,6 +417,29 @@ static u32 highest_slotid(struct nfsd4_session *ses) return idx; } +static void +encode_referring_call4(struct xdr_stream *xdr, + const struct nfsd4_referring_call *rc) +{ + encode_uint32(xdr, rc->rc_sequenceid); + encode_uint32(xdr, rc->rc_slotid); +} + +static void +encode_referring_call_list4(struct xdr_stream *xdr, + const struct nfsd4_referring_call_list *rcl) +{ + struct nfsd4_referring_call *rc; + __be32 *p; + + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN); + xdr_encode_opaque_fixed(p, rcl->rcl_sessionid.data, + NFS4_MAX_SESSIONID_LEN); + encode_uint32(xdr, rcl->__nr_referring_calls); + list_for_each_entry(rc, &rcl->rcl_referring_calls, __list) + encode_referring_call4(xdr, rc); +} + /* * CB_SEQUENCE4args * @@ -436,6 +457,7 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) { struct nfsd4_session *session = cb->cb_clp->cl_cb_session; + struct nfsd4_referring_call_list *rcl; __be32 *p; if (hdr->minorversion == 0) @@ -444,12 +466,16 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr, encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE); encode_sessionid4(xdr, session); - p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4); + p = xdr_reserve_space(xdr, XDR_UNIT * 4); *p++ = cpu_to_be32(session->se_cb_seq_nr[cb->cb_held_slot]); /* csa_sequenceid */ *p++ = cpu_to_be32(cb->cb_held_slot); /* csa_slotid */ *p++ = cpu_to_be32(highest_slotid(session)); /* csa_highest_slotid */ *p++ = xdr_zero; /* csa_cachethis */ - xdr_encode_empty_array(p); /* csa_referring_call_lists */ + + /* csa_referring_call_lists */ + encode_uint32(xdr, cb->cb_nr_referring_call_list); + list_for_each_entry(rcl, &cb->cb_referring_call_list, __list) + encode_referring_call_list4(xdr, rcl); hdr->nops++; } @@ -682,15 +708,15 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, if (unlikely(status || cb->cb_status)) return status; if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0) - return -NFSERR_BAD_XDR; + return -EIO; if (xdr_stream_decode_u32(xdr, &attrlen) < 0) - return -NFSERR_BAD_XDR; + return -EIO; maxlen = sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize); if (bitmap[2] != 0) maxlen += (sizeof(ncf->ncf_cb_mtime.tv_sec) + sizeof(ncf->ncf_cb_mtime.tv_nsec)) * 2; if (attrlen > maxlen) - return -NFSERR_BAD_XDR; + return -EIO; status = decode_cb_fattr4(xdr, bitmap, ncf); return status; } @@ -1064,6 +1090,17 @@ static bool nfsd4_queue_cb(struct nfsd4_callback *cb) return queue_work(clp->cl_callback_wq, &cb->cb_work); } +static void nfsd4_requeue_cb(struct rpc_task *task, struct nfsd4_callback *cb) +{ + struct nfs4_client *clp = cb->cb_clp; + + if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) { + trace_nfsd_cb_restart(clp, cb); + task->tk_status = 0; + set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags); + } +} + static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) { atomic_inc(&clp->cl_cb_inflight); @@ -1301,15 +1338,112 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb) trace_nfsd_cb_destroy(clp, cb); nfsd41_cb_release_slot(cb); + if (test_bit(NFSD4_CALLBACK_WAKE, &cb->cb_flags)) + clear_and_wake_up_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags); + else + clear_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags); + if (cb->cb_ops && cb->cb_ops->release) cb->cb_ops->release(cb); nfsd41_cb_inflight_end(clp); } -/* - * TODO: cb_sequence should support referring call lists, cachethis, - * and mark callback channel down on communication errors. +/** + * nfsd41_cb_referring_call - add a referring call to a callback operation + * @cb: context of callback to add the rc to + * @sessionid: referring call's session ID + * @slotid: referring call's session slot index + * @seqno: referring call's slot sequence number + * + * Caller serializes access to @cb. + * + * NB: If memory allocation fails, the referring call is not added. + */ +void nfsd41_cb_referring_call(struct nfsd4_callback *cb, + struct nfs4_sessionid *sessionid, + u32 slotid, u32 seqno) +{ + struct nfsd4_referring_call_list *rcl; + struct nfsd4_referring_call *rc; + bool found; + + might_sleep(); + + found = false; + list_for_each_entry(rcl, &cb->cb_referring_call_list, __list) { + if (!memcmp(rcl->rcl_sessionid.data, sessionid->data, + NFS4_MAX_SESSIONID_LEN)) { + found = true; + break; + } + } + if (!found) { + rcl = kmalloc(sizeof(*rcl), GFP_KERNEL); + if (!rcl) + return; + memcpy(rcl->rcl_sessionid.data, sessionid->data, + NFS4_MAX_SESSIONID_LEN); + rcl->__nr_referring_calls = 0; + INIT_LIST_HEAD(&rcl->rcl_referring_calls); + list_add(&rcl->__list, &cb->cb_referring_call_list); + cb->cb_nr_referring_call_list++; + } + + found = false; + list_for_each_entry(rc, &rcl->rcl_referring_calls, __list) { + if (rc->rc_sequenceid == seqno && rc->rc_slotid == slotid) { + found = true; + break; + } + } + if (!found) { + rc = kmalloc(sizeof(*rc), GFP_KERNEL); + if (!rc) + goto out; + rc->rc_sequenceid = seqno; + rc->rc_slotid = slotid; + rcl->__nr_referring_calls++; + list_add(&rc->__list, &rcl->rcl_referring_calls); + } + +out: + if (!rcl->__nr_referring_calls) { + cb->cb_nr_referring_call_list--; + kfree(rcl); + } +} + +/** + * nfsd41_cb_destroy_referring_call_list - release referring call info + * @cb: context of a callback that has completed + * + * Callers who allocate referring calls using nfsd41_cb_referring_call() must + * release those resources by calling nfsd41_cb_destroy_referring_call_list. + * + * Caller serializes access to @cb. */ +void nfsd41_cb_destroy_referring_call_list(struct nfsd4_callback *cb) +{ + struct nfsd4_referring_call_list *rcl; + struct nfsd4_referring_call *rc; + + while (!list_empty(&cb->cb_referring_call_list)) { + rcl = list_first_entry(&cb->cb_referring_call_list, + struct nfsd4_referring_call_list, + __list); + + while (!list_empty(&rcl->rcl_referring_calls)) { + rc = list_first_entry(&rcl->rcl_referring_calls, + struct nfsd4_referring_call, + __list); + list_del(&rc->__list); + kfree(rc); + } + list_del(&rcl->__list); + kfree(rcl); + } +} + static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; @@ -1328,30 +1462,14 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) rpc_call_start(task); } +/* Returns true if CB_COMPOUND processing should continue */ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb) { - struct nfs4_client *clp = cb->cb_clp; - struct nfsd4_session *session = clp->cl_cb_session; - bool ret = true; - - if (!clp->cl_minorversion) { - /* - * If the backchannel connection was shut down while this - * task was queued, we need to resubmit it after setting up - * a new backchannel connection. - * - * Note that if we lost our callback connection permanently - * the submission code will error out, so we don't need to - * handle that case here. - */ - if (RPC_SIGNALLED(task)) - goto need_restart; - - return true; - } + struct nfsd4_session *session = cb->cb_clp->cl_cb_session; + bool ret = false; if (cb->cb_held_slot < 0) - goto need_restart; + goto requeue; /* This is the operation status code for CB_SEQUENCE */ trace_nfsd_cb_seq_status(task, cb); @@ -1365,11 +1483,16 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback * (sequence ID, cached reply) MUST NOT change. */ ++session->se_cb_seq_nr[cb->cb_held_slot]; + ret = true; break; case -ESERVERFAULT: - ++session->se_cb_seq_nr[cb->cb_held_slot]; + /* + * Call succeeded, but the session, slot index, or slot + * sequence number in the response do not match the same + * in the server's call. The sequence information is thus + * untrustworthy. + */ nfsd4_mark_cb_fault(cb->cb_clp); - ret = false; break; case 1: /* @@ -1381,43 +1504,42 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback fallthrough; case -NFS4ERR_BADSESSION: nfsd4_mark_cb_fault(cb->cb_clp); - ret = false; - goto need_restart; + goto requeue; case -NFS4ERR_DELAY: cb->cb_seq_status = 1; - if (!rpc_restart_call(task)) - goto out; - + if (RPC_SIGNALLED(task) || !rpc_restart_call(task)) + goto requeue; rpc_delay(task, 2 * HZ); return false; + case -NFS4ERR_SEQ_MISORDERED: case -NFS4ERR_BADSLOT: + /* + * A SEQ_MISORDERED or BADSLOT error means that the client and + * server are out of sync as to the backchannel parameters. Mark + * the backchannel faulty and restart the RPC, but leak the slot + * so that it's no longer used. + */ + nfsd4_mark_cb_fault(cb->cb_clp); + cb->cb_held_slot = -1; goto retry_nowait; - case -NFS4ERR_SEQ_MISORDERED: - if (session->se_cb_seq_nr[cb->cb_held_slot] != 1) { - session->se_cb_seq_nr[cb->cb_held_slot] = 1; - goto retry_nowait; - } - break; default: nfsd4_mark_cb_fault(cb->cb_clp); } trace_nfsd_cb_free_slot(task, cb); nfsd41_cb_release_slot(cb); - - if (RPC_SIGNALLED(task)) - goto need_restart; -out: return ret; retry_nowait: - if (rpc_restart_call_prepare(task)) - ret = false; - goto out; -need_restart: - if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) { - trace_nfsd_cb_restart(clp, cb); - task->tk_status = 0; - cb->cb_need_restart = true; + /* + * RPC_SIGNALLED() means that the rpc_client is being torn down and + * (possibly) recreated. Requeue the call in that case. + */ + if (!RPC_SIGNALLED(task)) { + if (rpc_restart_call_prepare(task)) + return false; } +requeue: + nfsd41_cb_release_slot(cb); + nfsd4_requeue_cb(task, cb); return false; } @@ -1428,8 +1550,21 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) trace_nfsd_cb_rpc_done(clp); - if (!nfsd4_cb_sequence_done(task, cb)) + if (!clp->cl_minorversion) { + /* + * If the backchannel connection was shut down while this + * task was queued, we need to resubmit it after setting up + * a new backchannel connection. + * + * Note that if we lost our callback connection permanently + * the submission code will error out, so we don't need to + * handle that case here. + */ + if (RPC_SIGNALLED(task)) + nfsd4_requeue_cb(task, cb); + } else if (!nfsd4_cb_sequence_done(task, cb)) { return; + } if (cb->cb_status) { WARN_ONCE(task->tk_status, @@ -1462,7 +1597,7 @@ static void nfsd4_cb_release(void *calldata) trace_nfsd_cb_rpc_release(cb->cb_clp); - if (cb->cb_need_restart) + if (test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags)) nfsd4_queue_cb(cb); else nfsd41_destroy_cb(cb); @@ -1575,7 +1710,7 @@ nfsd4_run_cb_work(struct work_struct *work) container_of(work, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; - int flags; + int flags, ret; trace_nfsd_cb_start(clp); @@ -1601,16 +1736,19 @@ nfsd4_run_cb_work(struct work_struct *work) return; } - if (cb->cb_need_restart) { - cb->cb_need_restart = false; - } else { + if (!test_and_clear_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags)) { if (cb->cb_ops && cb->cb_ops->prepare) cb->cb_ops->prepare(cb); } + cb->cb_msg.rpc_cred = clp->cl_cb_cred; flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN; - rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, - cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); + ret = rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, + cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); + if (ret != 0) { + set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags); + nfsd4_queue_cb(cb); + } } void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, @@ -1620,11 +1758,13 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op]; cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; + cb->cb_flags = 0; cb->cb_ops = ops; INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); cb->cb_status = 0; - cb->cb_need_restart = false; cb->cb_held_slot = -1; + cb->cb_nr_referring_call_list = 0; + INIT_LIST_HEAD(&cb->cb_referring_call_list); } /** diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index fbfddd3c4c94..290271ac4245 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -344,9 +344,10 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls); trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid); - refcount_inc(&ls->ls_stid.sc_count); - nfsd4_run_cb(&ls->ls_recall); - + if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ls->ls_recall.cb_flags)) { + refcount_inc(&ls->ls_stid.sc_count); + nfsd4_run_cb(&ls->ls_recall); + } out_unlock: spin_unlock(&ls->ls_lock); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f6e06c779d09..f13abbb13b38 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -266,7 +266,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, inode_lock_nested(inode, I_MUTEX_PARENT); - child = lookup_one_len(open->op_fname, parent, open->op_fnamelen); + child = lookup_one(&nop_mnt_idmap, + &QSTR_LEN(open->op_fname, open->op_fnamelen), + parent); if (IS_ERR(child)) { status = nfserrno(PTR_ERR(child)); goto out; @@ -876,6 +878,8 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_getattr *getattr = &u->getattr; __be32 status; + trace_nfsd_vfs_getattr(rqstp, &cstate->current_fh); + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); if (status) return status; @@ -998,6 +1002,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, u64 cookie = readdir->rd_cookie; static const nfs4_verifier zeroverf; + trace_nfsd_vfs_readdir(rqstp, &cstate->current_fh, + readdir->rd_maxcount, readdir->rd_cookie); + /* no need to check permission - this will be done in nfsd_readdir() */ if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) @@ -1211,7 +1218,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd_file *nf = NULL; __be32 status = nfs_ok; unsigned long cnt; - int nvecs; if (write->wr_offset > (u64)OFFSET_MAX || write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX) @@ -1226,13 +1232,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; write->wr_how_written = write->wr_stable_how; - - nvecs = svc_fill_write_vector(rqstp, &write->wr_payload); - WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); - status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, - write->wr_offset, rqstp->rq_vec, nvecs, &cnt, - write->wr_how_written, + write->wr_offset, &write->wr_payload, + &cnt, write->wr_how_written, (__be32 *)write->wr_verifier.data); nfsd_file_put(nf); @@ -1379,8 +1381,11 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) static void nfsd4_stop_copy(struct nfsd4_copy *copy) { trace_nfsd_copy_async_cancel(copy); - if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) + if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) { kthread_stop(copy->copy_task); + copy->nfserr = nfs_ok; + set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); + } nfs4_put_copy(copy); } @@ -1709,10 +1714,11 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, switch (task->tk_status) { case -NFS4ERR_DELAY: if (cbo->co_retries--) { - rpc_delay(task, 1 * HZ); + rpc_delay(task, HZ / 5); return 0; } } + nfsd41_cb_destroy_referring_call_list(cb); return 1; } @@ -1845,9 +1851,12 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); + nfsd41_cb_referring_call(&cbo->co_cb, &cbo->co_referring_sessionid, + cbo->co_referring_slotid, + cbo->co_referring_seqno); trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid, &cbo->co_fh, copy->cp_count, copy->nfserr); - nfsd4_run_cb(&cbo->co_cb); + nfsd4_try_run_cb(&cbo->co_cb); } /** @@ -1961,6 +1970,11 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, memcpy(&result->cb_stateid, ©->cp_stateid.cs_stid, sizeof(result->cb_stateid)); dup_copy_fields(copy, async_copy); + memcpy(async_copy->cp_cb_offload.co_referring_sessionid.data, + cstate->session->se_sessionid.data, + NFS4_MAX_SESSIONID_LEN); + async_copy->cp_cb_offload.co_referring_slotid = cstate->slot->sl_index; + async_copy->cp_cb_offload.co_referring_seqno = cstate->slot->sl_seqid; async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); if (IS_ERR(async_copy->copy_task)) @@ -3766,7 +3780,8 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow; u32 opiter; - if (!cstate->minorversion) + if (rqstp->rq_procinfo != &nfsd_version4.vs_proc[NFSPROC4_COMPOUND] || + cstate->minorversion == 0) return false; if (cstate->spo_must_allowed) @@ -3832,7 +3847,7 @@ static const struct svc_procedure nfsd_procedures4[2] = { .pc_ressize = sizeof(struct nfsd4_compoundres), .pc_release = nfsd4_release_compoundargs, .pc_cachetype = RC_NOCACHE, - .pc_xdrressize = NFSD_BUFSIZE/4, + .pc_xdrressize = 3+NFSSVC_MAXBLKSIZE/4, .pc_name = "COMPOUND", }, }; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 28f4d5311c40..82785db730d9 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -33,6 +33,7 @@ */ #include <crypto/hash.h> +#include <crypto/sha2.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/namei.h> @@ -218,7 +219,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) /* lock the parent */ inode_lock(d_inode(dir)); - dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1); + dentry = lookup_one(&nop_mnt_idmap, &QSTR(dname), dir); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); goto out_unlock; @@ -233,9 +234,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); + if (IS_ERR(dentry)) + status = PTR_ERR(dentry); out_put: - dput(dentry); + if (!status) + dput(dentry); out_unlock: inode_unlock(d_inode(dir)); if (status == 0) { @@ -313,7 +317,8 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) list_for_each_entry_safe(entry, tmp, &ctx.names, list) { if (!status) { struct dentry *dentry; - dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); + dentry = lookup_one(&nop_mnt_idmap, + &QSTR(entry->name), dir); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); break; @@ -336,16 +341,16 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) } static int -nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) +nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn) { struct dentry *dir, *dentry; int status; - dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); + dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name); dir = nn->rec_file->f_path.dentry; inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); - dentry = lookup_one_len(name, dir, namlen); + dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); goto out_unlock; @@ -405,7 +410,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) if (status < 0) goto out_drop_write; - status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn); + status = nfsd4_unlink_clid_dir(dname, nn); nfs4_reset_creds(original_cred); if (status == 0) { vfs_fsync(nn->rec_file, 0); @@ -733,7 +738,6 @@ struct cld_net { spinlock_t cn_lock; struct list_head cn_list; unsigned int cn_xid; - struct crypto_shash *cn_tfm; #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING bool cn_has_legacy; #endif @@ -1059,8 +1063,6 @@ nfsd4_remove_cld_pipe(struct net *net) nfsd4_cld_unregister_net(net, cn->cn_pipe); rpc_destroy_pipe_data(cn->cn_pipe); - if (cn->cn_tfm) - crypto_free_shash(cn->cn_tfm); kfree(nn->cld_net); nn->cld_net = NULL; } @@ -1154,8 +1156,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; struct cld_msg_v2 *cmsg; - struct crypto_shash *tfm = cn->cn_tfm; - struct xdr_netobj cksum; char *principal = NULL; /* Don't upcall if it's already stored */ @@ -1178,22 +1178,9 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) else if (clp->cl_cred.cr_principal) principal = clp->cl_cred.cr_principal; if (principal) { - cksum.len = crypto_shash_digestsize(tfm); - cksum.data = kmalloc(cksum.len, GFP_KERNEL); - if (cksum.data == NULL) { - ret = -ENOMEM; - goto out; - } - ret = crypto_shash_tfm_digest(tfm, principal, strlen(principal), - cksum.data); - if (ret) { - kfree(cksum.data); - goto out; - } - cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len; - memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data, - cksum.data, cksum.len); - kfree(cksum.data); + sha256(principal, strlen(principal), + cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data); + cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = SHA256_DIGEST_SIZE; } else cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0; @@ -1203,7 +1190,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); } -out: free_cld_upcall(cup); out_err: if (ret) @@ -1342,12 +1328,11 @@ found: static int nfsd4_cld_check_v2(struct nfs4_client *clp) { - struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING struct cld_net *cn = nn->cld_net; - int status; - struct crypto_shash *tfm = cn->cn_tfm; - struct xdr_netobj cksum; +#endif + struct nfs4_client_reclaim *crp; char *principal = NULL; /* did we already find that this client is stable? */ @@ -1363,6 +1348,7 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) if (cn->cn_has_legacy) { struct xdr_netobj name; char dname[HEXDIR_LEN]; + int status; status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) @@ -1385,28 +1371,18 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) return -ENOENT; found: if (crp->cr_princhash.len) { + u8 digest[SHA256_DIGEST_SIZE]; + if (clp->cl_cred.cr_raw_principal) principal = clp->cl_cred.cr_raw_principal; else if (clp->cl_cred.cr_principal) principal = clp->cl_cred.cr_principal; if (principal == NULL) return -ENOENT; - cksum.len = crypto_shash_digestsize(tfm); - cksum.data = kmalloc(cksum.len, GFP_KERNEL); - if (cksum.data == NULL) - return -ENOENT; - status = crypto_shash_tfm_digest(tfm, principal, - strlen(principal), cksum.data); - if (status) { - kfree(cksum.data); - return -ENOENT; - } - if (memcmp(crp->cr_princhash.data, cksum.data, - crp->cr_princhash.len)) { - kfree(cksum.data); + sha256(principal, strlen(principal), digest); + if (memcmp(crp->cr_princhash.data, digest, + crp->cr_princhash.len)) return -ENOENT; - } - kfree(cksum.data); } crp->cr_clp = clp; return 0; @@ -1586,7 +1562,6 @@ nfsd4_cld_tracking_init(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); bool running; int retries = 10; - struct crypto_shash *tfm; status = nfs4_cld_state_init(net); if (status) @@ -1611,12 +1586,6 @@ nfsd4_cld_tracking_init(struct net *net) status = -ETIMEDOUT; goto err_remove; } - tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(tfm)) { - status = PTR_ERR(tfm); - goto err_remove; - } - nn->cld_net->cn_tfm = tfm; status = nfsd4_cld_get_version(nn); if (status == -EOPNOTSUPP) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 153eeea2c7c9..d5694987f86f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -946,15 +946,6 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla spin_lock_init(&stid->sc_lock); INIT_LIST_HEAD(&stid->sc_cp_list); - /* - * It shouldn't be a problem to reuse an opaque stateid value. - * I don't think it is for 4.1. But with 4.0 I worry that, for - * example, a stray write retransmission could be accepted by - * the server when it should have been rejected. Therefore, - * adopt a trick from the sctp code to attempt to maximize the - * amount of time until an id is reused, by ensuring they always - * "increase" (mod INT_MAX): - */ return stid; out_free: kmem_cache_free(slab, stid); @@ -1050,6 +1041,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) return openlockstateid(stid); } +/* + * As the sc_free callback of deleg, this may be called by nfs4_put_stid + * in nfsd_break_one_deleg. + * Considering nfsd_break_one_deleg is called with the flc->flc_lock held, + * this function mustn't ever sleep. + */ static void nfs4_free_deleg(struct nfs4_stid *stid) { struct nfs4_delegation *dp = delegstateid(stid); @@ -1378,7 +1375,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) struct nfs4_client *clp = dp->dl_stid.sc_client; WARN_ON(!list_empty(&dp->dl_recall_lru)); - WARN_ON_ONCE(!(dp->dl_stid.sc_status & + WARN_ON_ONCE(dp->dl_stid.sc_client->cl_minorversion > 0 && + !(dp->dl_stid.sc_status & (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED))); trace_nfsd_stid_revoke(&dp->dl_stid); @@ -1989,26 +1987,30 @@ reduce_session_slots(struct nfsd4_session *ses, int dec) return ret; } -/* - * We don't actually need to cache the rpc and session headers, so we - * can allocate a little less for each slot: - */ -static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) +static struct nfsd4_slot *nfsd4_alloc_slot(struct nfsd4_channel_attrs *fattrs, + int index, gfp_t gfp) { - u32 size; + struct nfsd4_slot *slot; + size_t size; - if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ) - size = 0; - else - size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; - return size + sizeof(struct nfsd4_slot); + /* + * The RPC and NFS session headers are never saved in + * the slot reply cache buffer. + */ + size = fattrs->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ ? + 0 : fattrs->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; + + slot = kzalloc(struct_size(slot, sl_data, size), gfp); + if (!slot) + return NULL; + slot->sl_index = index; + return slot; } static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, struct nfsd4_channel_attrs *battrs) { int numslots = fattrs->maxreqs; - int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; struct nfsd4_slot *slot; int i; @@ -2017,14 +2019,14 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, if (!new) return NULL; xa_init(&new->se_slots); - /* allocate each struct nfsd4_slot and data cache in one piece */ - slot = kzalloc(slotsize, GFP_KERNEL); + + slot = nfsd4_alloc_slot(fattrs, 0, GFP_KERNEL); if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL))) goto out_free; for (i = 1; i < numslots; i++) { const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; - slot = kzalloc(slotsize, gfp); + slot = nfsd4_alloc_slot(fattrs, i, gfp); if (!slot) break; if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) { @@ -3168,7 +3170,6 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; - clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); drop_client(clp); } @@ -3199,7 +3200,6 @@ nfsd4_cb_getattr_release(struct nfsd4_callback *cb) struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); - clear_and_wake_up_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags); nfs4_put_stid(&dp->dl_stid); } @@ -3220,11 +3220,15 @@ static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); - if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags)) + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncf->ncf_getattr.cb_flags)) return; + /* set to proper status when nfsd4_cb_getattr_done runs */ ncf->ncf_cb_status = NFS4ERR_IO; + /* ensure that wake_bit is done when RUNNING is cleared */ + set_bit(NFSD4_CALLBACK_WAKE, &ncf->ncf_getattr.cb_flags); + refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&ncf->ncf_getattr); } @@ -4402,7 +4406,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfserr_rep_too_big; if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) goto out_put_session; - svc_reserve(rqstp, buflen); + svc_reserve_auth(rqstp, buflen); status = nfs_ok; /* Success! accept new slot seqid */ @@ -4438,8 +4442,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * spinlock, and only succeeds if there is * plenty of memory. */ - slot = kzalloc(slot_bytes(&session->se_fchannel), - GFP_NOWAIT); + slot = nfsd4_alloc_slot(&session->se_fchannel, s, + GFP_NOWAIT); prev_slot = xa_load(&session->se_slots, s); if (xa_is_value(prev_slot) && slot) { slot->sl_seqid = xa_to_value(prev_slot); @@ -4815,8 +4819,8 @@ out: static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - int count; struct nfsd_net *nn = shrink->private_data; + long count; count = atomic_read(&nn->nfsd_courtesy_clients); if (!count) @@ -5414,6 +5418,11 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = { static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { + bool queued; + + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags)) + return; + /* * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease @@ -5422,7 +5431,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall)); + queued = nfsd4_run_cb(&dp->dl_recall); + WARN_ON_ONCE(!queued); + if (!queued) + refcount_dec(&dp->dl_stid.sc_count); } /* Called from break_lease() with flc_lock held. */ @@ -5948,11 +5960,23 @@ nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf) return 0; } +#ifdef CONFIG_NFSD_V4_DELEG_TIMESTAMPS +static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) +{ + return open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS; +} +#else /* CONFIG_NFSD_V4_DELEG_TIMESTAMPS */ +static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) +{ + return false; +} +#endif /* CONFIG NFSD_V4_DELEG_TIMESTAMPS */ + static struct nfs4_delegation * nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *parent) { - bool deleg_ts = open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS; + bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct nfs4_file *fp = stp->st_stid.sc_file; struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; @@ -5999,6 +6023,15 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (!nf) return ERR_PTR(-EAGAIN); + /* + * File delegations and associated locks cannot be recovered if the + * export is from an NFS proxy server. + */ + if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { + nfsd_file_put(nf); + return ERR_PTR(-EOPNOTSUPP); + } + spin_lock(&state_lock); spin_lock(&fp->fi_lock); if (nfs4_delegation_exists(clp, fp)) @@ -6151,8 +6184,8 @@ static void nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *currentfh) { - bool deleg_ts = open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS; struct nfs4_openowner *oo = openowner(stp->st_stateowner); + bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct svc_fh *parent = NULL; struct nfs4_delegation *dp; @@ -6855,38 +6888,34 @@ deleg_reaper(struct nfsd_net *nn) { struct list_head *pos, *next; struct nfs4_client *clp; - LIST_HEAD(cblist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_state != NFSD4_ACTIVE || - list_empty(&clp->cl_delegations) || - atomic_read(&clp->cl_delegs_in_recall) || - test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || - (ktime_get_boottime_seconds() - - clp->cl_ra_time < 5)) { + + if (clp->cl_state != NFSD4_ACTIVE) + continue; + if (list_empty(&clp->cl_delegations)) + continue; + if (atomic_read(&clp->cl_delegs_in_recall)) + continue; + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &clp->cl_ra->ra_cb.cb_flags)) + continue; + if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5) + continue; + if (clp->cl_cb_state != NFSD4_CB_UP) continue; - } - list_add(&clp->cl_ra_cblist, &cblist); /* release in nfsd4_cb_recall_any_release */ kref_get(&clp->cl_nfsdfs.cl_ref); - set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); clp->cl_ra_time = ktime_get_boottime_seconds(); - } - spin_unlock(&nn->client_lock); - - while (!list_empty(&cblist)) { - clp = list_first_entry(&cblist, struct nfs4_client, - cl_ra_cblist); - list_del_init(&clp->cl_ra_cblist); clp->cl_ra->ra_keep = 0; clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) | BIT(RCA4_TYPE_MASK_WDATA_DLG); trace_nfsd_cb_recall_any(clp->cl_ra); nfsd4_run_cb(&clp->cl_ra->ra_cb); } + spin_unlock(&nn->client_lock); } static void @@ -7051,7 +7080,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, */ statusmask |= SC_STATUS_REVOKED; - statusmask |= SC_STATUS_ADMIN_REVOKED; + statusmask |= SC_STATUS_ADMIN_REVOKED | SC_STATUS_FREEABLE; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) @@ -7706,9 +7735,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, - SC_STATUS_REVOKED | SC_STATUS_FREEABLE, - &s, nn); + status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn); if (status) goto out; dp = delegstateid(s); @@ -7816,7 +7843,7 @@ nfsd4_lm_notify(struct file_lock *fl) if (queue) { trace_nfsd_cb_notify_lock(lo, nbl); - nfsd4_run_cb(&nbl->nbl_cb); + nfsd4_try_run_cb(&nbl->nbl_cb); } } @@ -8134,6 +8161,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); if (status != nfs_ok) return status; + if (exportfs_cannot_lock(cstate->current_fh.fh_dentry->d_sb->s_export_op)) { + status = nfserr_notsupp; + goto out; + } if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) @@ -8473,6 +8504,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_lock_range; goto put_stateid; } + if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { + status = nfserr_notsupp; + goto put_file; + } + file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); @@ -9182,8 +9218,8 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, nfs4_cb_getattr(&dp->dl_cb_fattr); spin_unlock(&ctx->flc_lock); - wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, - TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); + wait_on_bit_timeout(&ncf->ncf_getattr.cb_flags, NFSD4_CALLBACK_RUNNING, + TASK_UNINTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); if (ncf->ncf_cb_status) { /* Recall delegation only if client didn't respond */ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e67420729ecd..3afcdbed6e14 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2564,7 +2564,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) /* Sessions make the DRC unnecessary: */ if (argp->minorversion) cachethis = false; - svc_reserve(argp->rqstp, max_reply + readbytes); + svc_reserve_auth(argp->rqstp, max_reply + readbytes); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; argp->splice_ok = nfsd_read_splice_ok(argp->rqstp); @@ -3391,6 +3391,23 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr, return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]); } +/* + * Copied from generic_remap_checks/generic_remap_file_range_prep. + * + * These generic functions use the file system's s_blocksize, but + * individual file systems aren't required to use + * generic_remap_file_range_prep. Until there is a mechanism for + * determining a particular file system's (or file's) clone block + * size, this is the best NFSD can do. + */ +static __be32 nfsd4_encode_fattr4_clone_blksize(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + struct inode *inode = d_inode(args->dentry); + + return nfsd4_encode_uint32_t(xdr, inode->i_sb->s_blocksize); +} + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) @@ -3545,7 +3562,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_MODE_SET_MASKED] = nfsd4_encode_fattr4__noop, [FATTR4_SUPPATTR_EXCLCREAT] = nfsd4_encode_fattr4_suppattr_exclcreat, [FATTR4_FS_CHARSET_CAP] = nfsd4_encode_fattr4__noop, - [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4__noop, + [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4_clone_blksize, [FATTR4_SPACE_FREED] = nfsd4_encode_fattr4__noop, [FATTR4_CHANGE_ATTR_TYPE] = nfsd4_encode_fattr4__noop, @@ -3812,7 +3829,9 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name, __be32 nfserr; int ignore_crossmnt = 0; - dentry = lookup_positive_unlocked(name, cd->rd_fhp->fh_dentry, namlen); + dentry = lookup_one_positive_unlocked(&nop_mnt_idmap, + &QSTR_LEN(name, namlen), + cd->rd_fhp->fh_dentry); if (IS_ERR(dentry)) return nfserrno(PTR_ERR(dentry)); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index ce2a71e4904c..3f3e9f6c4250 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1917,6 +1917,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) struct svc_serv *serv; LIST_HEAD(permsocks); struct nfsd_net *nn; + bool delete = false; int err, rem; mutex_lock(&nfsd_mutex); @@ -1977,34 +1978,28 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) } } - /* For now, no removing old sockets while server is running */ - if (serv->sv_nrthreads && !list_empty(&permsocks)) { + /* + * If there are listener transports remaining on the permsocks list, + * it means we were asked to remove a listener. + */ + if (!list_empty(&permsocks)) { list_splice_init(&permsocks, &serv->sv_permsocks); - spin_unlock_bh(&serv->sv_lock); - err = -EBUSY; - goto out_unlock_mtx; + delete = true; } + spin_unlock_bh(&serv->sv_lock); - /* Close the remaining sockets on the permsocks list */ - while (!list_empty(&permsocks)) { - xprt = list_first_entry(&permsocks, struct svc_xprt, xpt_list); - list_move(&xprt->xpt_list, &serv->sv_permsocks); - - /* - * Newly-created sockets are born with the BUSY bit set. Clear - * it if there are no threads, since nothing can pick it up - * in that case. - */ - if (!serv->sv_nrthreads) - clear_bit(XPT_BUSY, &xprt->xpt_flags); - - set_bit(XPT_CLOSE, &xprt->xpt_flags); - spin_unlock_bh(&serv->sv_lock); - svc_xprt_close(xprt); - spin_lock_bh(&serv->sv_lock); + /* Do not remove listeners while there are active threads. */ + if (serv->sv_nrthreads) { + err = -EBUSY; + goto out_unlock_mtx; } - spin_unlock_bh(&serv->sv_lock); + /* + * Since we can't delete an arbitrary llist entry, destroy the + * remaining listeners and recreate the list. + */ + if (delete) + svc_xprt_destroy_all(serv, net); /* walk list of addrs again, open any that still don't exist */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { @@ -2031,6 +2026,9 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) xprt = svc_find_listener(serv, xcl_name, net, sa); if (xprt) { + if (delete) + WARN_ONCE(1, "Transport type=%s already exists\n", + xcl_name); svc_xprt_put(xprt); continue; } @@ -2204,8 +2202,14 @@ static __net_init int nfsd_net_init(struct net *net) NFSD_STATS_COUNTERS_NUM); if (retval) goto out_repcache_error; + memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats)); nn->nfsd_svcstats.program = &nfsd_programs[0]; + if (!nfsd_proc_stat_init(net)) { + retval = -ENOMEM; + goto out_proc_error; + } + for (i = 0; i < sizeof(nn->nfsd_versions); i++) nn->nfsd_versions[i] = nfsd_support_version(i); for (i = 0; i < sizeof(nn->nfsd4_minorversions); i++) @@ -2215,13 +2219,14 @@ static __net_init int nfsd_net_init(struct net *net) nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); - nfsd_proc_stat_init(net); #if IS_ENABLED(CONFIG_NFS_LOCALIO) spin_lock_init(&nn->local_clients_lock); INIT_LIST_HEAD(&nn->local_clients); #endif return 0; +out_proc_error: + percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); out_repcache_error: nfsd_idmap_shutdown(net); out_idmap_error: @@ -2276,6 +2281,8 @@ static int __init init_nfsd(void) { int retval; + nfsd_debugfs_init(); + retval = nfsd4_init_slabs(); if (retval) return retval; @@ -2286,12 +2293,9 @@ static int __init init_nfsd(void) if (retval) goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ - retval = create_proc_exports_entry(); - if (retval) - goto out_free_lockd; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) - goto out_free_exports; + goto out_free_lockd; retval = register_cld_notifier(); if (retval) goto out_free_subsys; @@ -2300,22 +2304,26 @@ static int __init init_nfsd(void) goto out_free_cld; retval = register_filesystem(&nfsd_fs_type); if (retval) - goto out_free_all; + goto out_free_nfsd4; retval = genl_register_family(&nfsd_nl_family); if (retval) + goto out_free_filesystem; + retval = create_proc_exports_entry(); + if (retval) goto out_free_all; nfsd_localio_ops_init(); return 0; out_free_all: + genl_unregister_family(&nfsd_nl_family); +out_free_filesystem: + unregister_filesystem(&nfsd_fs_type); +out_free_nfsd4: nfsd4_destroy_laundry_wq(); out_free_cld: unregister_cld_notifier(); out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); -out_free_exports: - remove_proc_entry("fs/nfs/exports", NULL); - remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free(); @@ -2323,22 +2331,24 @@ out_free_pnfs: nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); + nfsd_debugfs_exit(); return retval; } static void __exit exit_nfsd(void) { + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); genl_unregister_family(&nfsd_nl_family); unregister_filesystem(&nfsd_fs_type); nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); - remove_proc_entry("fs/nfs/exports", NULL); - remove_proc_entry("fs/nfs", NULL); nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); + nfsd_debugfs_exit(); } MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index e2997f0ffbc5..1bfd0b4e9af7 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -44,24 +44,14 @@ bool nfsd_support_version(int vers); #include "stats.h" /* - * Maximum blocksizes supported by daemon under various circumstances. + * Default and maximum payload size (NFS READ or WRITE), in bytes. + * The default is historical, and the maximum is an implementation + * limit. */ -#define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD -/* NFSv2 is limited by the protocol specification, see RFC 1094 */ -#define NFSSVC_MAXBLKSIZE_V2 (8*1024) - - -/* - * Largest number of bytes we need to allocate for an NFS - * call or reply. Used to control buffer sizes. We use - * the length of v3 WRITE, READDIR and READDIR replies - * which are an RPC header, up to 26 XDR units of reply - * data, and some page data. - * - * Note that accuracy here doesn't matter too much as the - * size is rounded up to a page size when allocating space. - */ -#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE) +enum { + NFSSVC_DEFBLKSIZE = 1 * 1024 * 1024, + NFSSVC_MAXBLKSIZE = RPCSVC_MAXPAYLOAD, +}; struct readdir_cd { __be32 err; /* 0, nfserr, or nfserr_eof */ @@ -156,6 +146,16 @@ void nfsd_reset_versions(struct nfsd_net *nn); int nfsd_create_serv(struct net *net); void nfsd_destroy_serv(struct net *net); +#ifdef CONFIG_DEBUG_FS +void nfsd_debugfs_init(void); +void nfsd_debugfs_exit(void); +#else +static inline void nfsd_debugfs_init(void) {} +static inline void nfsd_debugfs_exit(void) {} +#endif + +extern bool nfsd_disable_splice_read __read_mostly; + extern int nfsd_max_blksize; static inline int nfsd_v4client(struct svc_rqst *rq) diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 876152a91f12..5103c2f4d225 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -267,7 +267,6 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1, return true; } -#ifdef CONFIG_CRC32 /** * knfsd_fh_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -279,12 +278,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); } -#else -static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) -{ - return 0; -} -#endif /** * fh_clear_pre_post_attrs - Reset pre/post attributes diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 6dda081eb24c..c10fa8128a8a 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -10,6 +10,7 @@ #include "cache.h" #include "xdr.h" #include "vfs.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -54,7 +55,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; - dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); + trace_nfsd_vfs_getattr(rqstp, &argp->fh); fh_copy(&resp->fh, &argp->fh); resp->status = fh_verify(rqstp, &resp->fh, 0, @@ -211,7 +212,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) SVCFH_fmt(&argp->fh), argp->count, argp->offset); - argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); + argp->count = min_t(u32, argp->count, NFS_MAXDATA); argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); resp->pages = rqstp->rq_next_page; @@ -250,17 +251,14 @@ nfsd_proc_write(struct svc_rqst *rqstp) struct nfsd_writeargs *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; unsigned long cnt = argp->len; - unsigned int nvecs; dprintk("nfsd: WRITE %s %u bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nvecs = svc_fill_write_vector(rqstp, &argp->payload); - - resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), - argp->offset, rqstp->rq_vec, nvecs, - &cnt, NFS_DATA_SYNC, NULL); + fh_copy(&resp->fh, &argp->fh); + resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, + &argp->payload, &cnt, NFS_DATA_SYNC, NULL); if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) @@ -292,9 +290,6 @@ nfsd_proc_create(struct svc_rqst *rqstp) int hosterr; dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); - dprintk("nfsd: CREATE %s %.*s\n", - SVCFH_fmt(dirfhp), argp->len, argp->name); - /* First verify the parent file handle */ resp->status = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC); if (resp->status != nfs_ok) @@ -312,7 +307,8 @@ nfsd_proc_create(struct svc_rqst *rqstp) } inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT); - dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); + dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(argp->name, argp->len), + dirfhp->fh_dentry); if (IS_ERR(dchild)) { resp->status = nfserrno(PTR_ERR(dchild)); goto out_unlock; @@ -331,7 +327,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ resp->status = nfserr_acces; if (!newfhp->fh_dentry) { - printk(KERN_WARNING + printk(KERN_WARNING "nfsd_proc_create: file handle not verified\n"); goto out_unlock; } @@ -445,9 +441,6 @@ nfsd_proc_remove(struct svc_rqst *rqstp) struct nfsd_diropargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: REMOVE %s %.*s\n", SVCFH_fmt(&argp->fh), - argp->len, argp->name); - /* Unlink. -SIFDIR means file must not be a directory */ resp->status = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, argp->name, argp->len); @@ -462,11 +455,6 @@ nfsd_proc_rename(struct svc_rqst *rqstp) struct nfsd_renameargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: RENAME %s %.*s -> \n", - SVCFH_fmt(&argp->ffh), argp->flen, argp->fname); - dprintk("nfsd: -> %s %.*s\n", - SVCFH_fmt(&argp->tfh), argp->tlen, argp->tname); - resp->status = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen, &argp->tfh, argp->tname, argp->tlen); fh_put(&argp->ffh); @@ -481,13 +469,6 @@ nfsd_proc_link(struct svc_rqst *rqstp) struct nfsd_linkargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: LINK %s ->\n", - SVCFH_fmt(&argp->ffh)); - dprintk("nfsd: %s %.*s\n", - SVCFH_fmt(&argp->tfh), - argp->tlen, - argp->tname); - resp->status = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen, &argp->ffh); fh_put(&argp->ffh); @@ -519,10 +500,6 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) goto out; } - dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n", - SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, - argp->tlen, argp->tname); - fh_init(&newfh, NFS_FHSIZE); resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, argp->tname, &attrs, &newfh); @@ -548,8 +525,6 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) .na_iattr = &argp->attrs, }; - dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); - if (resp->fh.fh_dentry) { printk(KERN_WARNING "nfsd_proc_mkdir: response already verified??\n"); @@ -578,8 +553,6 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp) struct nfsd_diropargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: RMDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); - resp->status = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len); fh_put(&argp->fh); @@ -615,9 +588,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) struct nfsd_readdirres *resp = rqstp->rq_resp; loff_t offset; - dprintk("nfsd: READDIR %s %d bytes at %d\n", - SVCFH_fmt(&argp->fh), - argp->count, argp->cookie); + trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie); nfsd_init_dirlist_pages(rqstp, resp, argp->count); @@ -642,8 +613,6 @@ nfsd_proc_statfs(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_statfsres *resp = rqstp->rq_resp; - dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); - resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, NFSD_MAY_BYPASS_GSS_ON_ROOT); fh_put(&argp->fh); @@ -739,7 +708,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_argzero = sizeof(struct nfsd_readargs), .pc_ressize = sizeof(struct nfsd_readres), .pc_cachetype = RC_NOCACHE, - .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, + .pc_xdrressize = ST+AT+1+NFS_MAXDATA/4, .pc_name = "READ", }, [NFSPROC_WRITECACHE] = { diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9b3d6cff0e1e..82b0111ac469 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -396,13 +396,13 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred) if (ret) goto out_filecache; +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + nfsd4_ssc_init_umount_work(nn); +#endif ret = nfs4_state_start_net(net); if (ret) goto out_reply_cache; -#ifdef CONFIG_NFSD_V4_2_INTER_SSC - nfsd4_ssc_init_umount_work(nn); -#endif nn->nfsd_net_up = true; return 0; @@ -582,7 +582,7 @@ static int nfsd_get_default_max_blksize(void) */ target >>= 12; - ret = NFSSVC_MAXBLKSIZE; + ret = NFSSVC_DEFBLKSIZE; while (ret > target && ret >= 8*1024*2) ret /= 2; return ret; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 5777f40c7353..fc262ceafca9 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -336,7 +336,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) /* opaque data */ if (xdr_stream_decode_u32(xdr, &args->len) < 0) return false; - if (args->len > NFSSVC_MAXBLKSIZE_V2) + if (args->len > NFS_MAXDATA) return false; return xdr_stream_subsegment(xdr, &args->payload, args->len); @@ -540,7 +540,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr) p = xdr_reserve_space(xdr, XDR_UNIT * 5); if (!p) return false; - *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2); + *p++ = cpu_to_be32(NFS_MAXDATA); *p++ = cpu_to_be32(stat->f_bsize); *p++ = cpu_to_be32(stat->f_blocks); *p++ = cpu_to_be32(stat->f_bfree); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 74d2d7b42676..1995bca158b8 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -64,15 +64,36 @@ typedef struct { refcount_t cs_count; } copy_stateid_t; +struct nfsd4_referring_call { + struct list_head __list; + + u32 rc_sequenceid; + u32 rc_slotid; +}; + +struct nfsd4_referring_call_list { + struct list_head __list; + + struct nfs4_sessionid rcl_sessionid; + int __nr_referring_calls; + struct list_head rcl_referring_calls; +}; + struct nfsd4_callback { struct nfs4_client *cb_clp; struct rpc_message cb_msg; +#define NFSD4_CALLBACK_RUNNING (0) +#define NFSD4_CALLBACK_WAKE (1) +#define NFSD4_CALLBACK_REQUEUE (2) + unsigned long cb_flags; const struct nfsd4_callback_ops *cb_ops; struct work_struct cb_work; int cb_seq_status; int cb_status; int cb_held_slot; - bool cb_need_restart; + + int cb_nr_referring_call_list; + struct list_head cb_referring_call_list; }; struct nfsd4_callback_ops { @@ -162,15 +183,11 @@ struct nfs4_cb_fattr { struct timespec64 ncf_cb_mtime; struct timespec64 ncf_cb_atime; - unsigned long ncf_cb_flags; bool ncf_file_modified; u64 ncf_initial_cinfo; u64 ncf_cur_fsize; }; -/* bits for ncf_cb_flags */ -#define CB_GETATTR_BUSY 0 - /* * Represents a delegation stateid. The nfs4_client holds references to these * and they are put when it is being destroyed or when the delegation is @@ -198,8 +215,8 @@ struct nfs4_delegation { struct list_head dl_perclnt; struct list_head dl_recall_lru; /* delegation recalled */ struct nfs4_clnt_odstate *dl_clnt_odstate; - u32 dl_type; time64_t dl_time; + u32 dl_type; /* For recall: */ int dl_retries; struct nfsd4_callback dl_recall; @@ -261,6 +278,7 @@ struct nfsd4_slot { u32 sl_seqid; __be32 sl_status; struct svc_cred sl_cred; + u32 sl_index; u32 sl_datalen; u16 sl_opcnt; u16 sl_generation; @@ -452,7 +470,6 @@ struct nfs4_client { #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) -#define NFSD4_CLIENT_CB_RECALL_ANY (6) unsigned long cl_flags; struct workqueue_struct *cl_callback_wq; @@ -498,7 +515,6 @@ struct nfs4_client { struct nfsd4_cb_recall_any *cl_ra; time64_t cl_ra_time; - struct list_head cl_ra_cblist; }; /* struct nfs4_client_reset @@ -777,9 +793,20 @@ extern __be32 nfs4_check_open_reclaim(struct nfs4_client *); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); +extern void nfsd41_cb_referring_call(struct nfsd4_callback *cb, + struct nfs4_sessionid *sessionid, + u32 slotid, u32 seqno); +extern void nfsd41_cb_destroy_referring_call_list(struct nfsd4_callback *cb); extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); extern bool nfsd4_run_cb(struct nfsd4_callback *cb); + +static inline void nfsd4_try_run_cb(struct nfsd4_callback *cb) +{ + if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags)) + WARN_ON_ONCE(!nfsd4_run_cb(cb)); +} + extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); void nfsd4_async_copy_reaper(struct nfsd_net *nn); diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index bb22893f1157..f7eaf95e20fc 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -73,11 +73,11 @@ static int nfsd_show(struct seq_file *seq, void *v) DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); -void nfsd_proc_stat_init(struct net *net) +struct proc_dir_entry *nfsd_proc_stat_init(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); + return svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); } void nfsd_proc_stat_shutdown(struct net *net) diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h index 04aacb6c36e2..e4efb0e4e56d 100644 --- a/fs/nfsd/stats.h +++ b/fs/nfsd/stats.h @@ -10,7 +10,7 @@ #include <uapi/linux/nfsd/stats.h> #include <linux/percpu_counter.h> -void nfsd_proc_stat_init(struct net *net); +struct proc_dir_entry *nfsd_proc_stat_init(struct net *net); void nfsd_proc_stat_shutdown(struct net *net); static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index ad2c0c432d08..3c5505ef5e3a 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -11,6 +11,7 @@ #include <linux/tracepoint.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xprt.h> +#include <trace/misc/fs.h> #include <trace/misc/nfs.h> #include <trace/misc/sunrpc.h> @@ -18,22 +19,40 @@ #include "nfsfh.h" #include "xdr4.h" -#define NFSD_TRACE_PROC_RES_FIELDS \ +#define NFSD_TRACE_PROC_CALL_FIELDS(r) \ + __field(unsigned int, netns_ino) \ + __field(u32, xid) \ + __sockaddr(server, (r)->rq_xprt->xpt_locallen) \ + __sockaddr(client, (r)->rq_xprt->xpt_remotelen) + +#define NFSD_TRACE_PROC_CALL_ASSIGNMENTS(r) \ + do { \ + struct svc_xprt *xprt = (r)->rq_xprt; \ + __entry->netns_ino = SVC_NET(r)->ns.inum; \ + __entry->xid = be32_to_cpu((r)->rq_xid); \ + __assign_sockaddr(server, &xprt->xpt_local, \ + xprt->xpt_locallen); \ + __assign_sockaddr(client, &xprt->xpt_remote, \ + xprt->xpt_remotelen); \ + } while (0) + +#define NFSD_TRACE_PROC_RES_FIELDS(r) \ __field(unsigned int, netns_ino) \ __field(u32, xid) \ __field(unsigned long, status) \ - __array(unsigned char, server, sizeof(struct sockaddr_in6)) \ - __array(unsigned char, client, sizeof(struct sockaddr_in6)) + __sockaddr(server, (r)->rq_xprt->xpt_locallen) \ + __sockaddr(client, (r)->rq_xprt->xpt_remotelen) -#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(error) \ +#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(r, error) \ do { \ - __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \ - __entry->xid = be32_to_cpu(rqstp->rq_xid); \ + struct svc_xprt *xprt = (r)->rq_xprt; \ + __entry->netns_ino = SVC_NET(r)->ns.inum; \ + __entry->xid = be32_to_cpu((r)->rq_xid); \ __entry->status = be32_to_cpu(error); \ - memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \ - rqstp->rq_xprt->xpt_locallen); \ - memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \ - rqstp->rq_xprt->xpt_remotelen); \ + __assign_sockaddr(server, &xprt->xpt_local, \ + xprt->xpt_locallen); \ + __assign_sockaddr(client, &xprt->xpt_remote, \ + xprt->xpt_remotelen); \ } while (0); DECLARE_EVENT_CLASS(nfsd_xdr_err_class, @@ -145,14 +164,14 @@ TRACE_EVENT(nfsd_compound_decode_err, ), TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status), TP_STRUCT__entry( - NFSD_TRACE_PROC_RES_FIELDS + NFSD_TRACE_PROC_RES_FIELDS(rqstp) __field(u32, args_opcnt) __field(u32, resp_opcnt) __field(u32, opnum) ), TP_fast_assign( - NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) + NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status) __entry->args_opcnt = args_opcnt; __entry->resp_opcnt = resp_opcnt; @@ -171,12 +190,12 @@ DECLARE_EVENT_CLASS(nfsd_compound_err_class, ), TP_ARGS(rqstp, opnum, status), TP_STRUCT__entry( - NFSD_TRACE_PROC_RES_FIELDS + NFSD_TRACE_PROC_RES_FIELDS(rqstp) __field(u32, opnum) ), TP_fast_assign( - NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) + NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status) __entry->opnum = opnum; ), @@ -451,6 +470,8 @@ DEFINE_NFSD_IO_EVENT(write_start); DEFINE_NFSD_IO_EVENT(write_opened); DEFINE_NFSD_IO_EVENT(write_io_done); DEFINE_NFSD_IO_EVENT(write_done); +DEFINE_NFSD_IO_EVENT(commit_start); +DEFINE_NFSD_IO_EVENT(commit_done); DECLARE_EVENT_CLASS(nfsd_err_class, TP_PROTO(struct svc_rqst *rqstp, @@ -803,6 +824,14 @@ DEFINE_EVENT(nfsd_cs_slot_class, nfsd_##name, \ DEFINE_CS_SLOT_EVENT(slot_seqid_conf); DEFINE_CS_SLOT_EVENT(slot_seqid_unconf); +#define show_nfs_slot_flags(val) \ + __print_flags(val, "|", \ + { NFSD4_SLOT_INUSE, "INUSE" }, \ + { NFSD4_SLOT_CACHETHIS, "CACHETHIS" }, \ + { NFSD4_SLOT_INITIALIZED, "INITIALIZED" }, \ + { NFSD4_SLOT_CACHED, "CACHED" }, \ + { NFSD4_SLOT_REUSED, "REUSED" }) + TRACE_EVENT(nfsd_slot_seqid_sequence, TP_PROTO( const struct nfs4_client *clp, @@ -813,10 +842,11 @@ TRACE_EVENT(nfsd_slot_seqid_sequence, TP_STRUCT__entry( __field(u32, seqid) __field(u32, slot_seqid) + __field(u32, slot_index) + __field(unsigned long, slot_flags) __field(u32, cl_boot) __field(u32, cl_id) __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) - __field(bool, in_use) ), TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; @@ -825,11 +855,13 @@ TRACE_EVENT(nfsd_slot_seqid_sequence, clp->cl_cb_conn.cb_addrlen); __entry->seqid = seq->seqid; __entry->slot_seqid = slot->sl_seqid; + __entry->slot_index = seq->slotid; + __entry->slot_flags = slot->sl_flags; ), - TP_printk("addr=%pISpc client %08x:%08x seqid=%u slot_seqid=%u (%sin use)", + TP_printk("addr=%pISpc client %08x:%08x idx=%u seqid=%u slot_seqid=%u flags=%s", __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, - __entry->seqid, __entry->slot_seqid, - __entry->in_use ? "" : "not " + __entry->slot_index, __entry->seqid, __entry->slot_seqid, + show_nfs_slot_flags(__entry->slot_flags) ) ); @@ -1039,6 +1071,7 @@ DEFINE_CLID_EVENT(confirmed_r); { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ + { 1 << NFSD_FILE_RECENT, "RECENT" }, \ { 1 << NFSD_FILE_GC, "GC" }) DECLARE_EVENT_CLASS(nfsd_file_class, @@ -1317,6 +1350,7 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_aged); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, @@ -1346,6 +1380,7 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ TP_ARGS(removed, remaining)) DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_recent); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); TRACE_EVENT(nfsd_file_close, @@ -1602,7 +1637,7 @@ DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class, __entry->cl_id = clp->cl_clientid.cl_id; __entry->cb = cb; __entry->opcode = cb->cb_ops ? cb->cb_ops->opcode : _CB_NULL; - __entry->need_restart = cb->cb_need_restart; + __entry->need_restart = test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags); __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, clp->cl_cb_conn.cb_addrlen) ), @@ -2321,6 +2356,259 @@ DEFINE_EVENT(nfsd_copy_async_done_class, \ DEFINE_COPY_ASYNC_DONE_EVENT(done); DEFINE_COPY_ASYNC_DONE_EVENT(cancel); +TRACE_EVENT(nfsd_vfs_setattr, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const struct iattr *iap, + const struct timespec64 *guardtime + ), + TP_ARGS(rqstp, fhp, iap, guardtime), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __field(s64, gtime_tv_sec) + __field(u32, gtime_tv_nsec) + __field(unsigned int, ia_valid) + __field(loff_t, ia_size) + __field(uid_t, ia_uid) + __field(gid_t, ia_gid) + __field(umode_t, ia_mode) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->gtime_tv_sec = guardtime ? guardtime->tv_sec : 0; + __entry->gtime_tv_nsec = guardtime ? guardtime->tv_nsec : 0; + __entry->ia_valid = iap->ia_valid; + __entry->ia_size = iap->ia_size; + __entry->ia_uid = __kuid_val(iap->ia_uid); + __entry->ia_gid = __kgid_val(iap->ia_gid); + __entry->ia_mode = iap->ia_mode; + ), + TP_printk( + "xid=0x%08x fh_hash=0x%08x ia_valid=%s ia_size=%llu ia_mode=0%o ia_uid=%u ia_gid=%u guard_time=%lld.%u", + __entry->xid, __entry->fh_hash, show_ia_valid_flags(__entry->ia_valid), + __entry->ia_size, __entry->ia_mode, __entry->ia_uid, __entry->ia_gid, + __entry->gtime_tv_sec, __entry->gtime_tv_nsec + ) +) + +TRACE_EVENT(nfsd_vfs_lookup, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const char *name, + unsigned int len + ), + TP_ARGS(rqstp, fhp, name, len), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __string_len(name, name, len) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __assign_str(name); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x name=%s", + __entry->xid, __entry->fh_hash, __get_str(name) + ) +); + +TRACE_EVENT(nfsd_vfs_create, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + const char *name, + unsigned int len + ), + TP_ARGS(rqstp, fhp, type, name, len), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __field(umode_t, type) + __string_len(name, name, len) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->type = type; + __assign_str(name); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x type=%s name=%s", + __entry->xid, __entry->fh_hash, + show_fs_file_type(__entry->type), __get_str(name) + ) +); + +TRACE_EVENT(nfsd_vfs_symlink, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const char *name, + unsigned int namelen, + const char *target + ), + TP_ARGS(rqstp, fhp, name, namelen, target), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __string_len(name, name, namelen) + __string(target, target) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __assign_str(name); + __assign_str(target); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x name=%s target=%s", + __entry->xid, __entry->fh_hash, + __get_str(name), __get_str(target) + ) +); + +TRACE_EVENT(nfsd_vfs_link, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *sfhp, + const struct svc_fh *tfhp, + const char *name, + unsigned int namelen + ), + TP_ARGS(rqstp, sfhp, tfhp, name, namelen), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, sfh_hash) + __field(u32, tfh_hash) + __string_len(name, name, namelen) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle); + __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle); + __assign_str(name); + ), + TP_printk("xid=0x%08x src_fh=0x%08x tgt_fh=0x%08x name=%s", + __entry->xid, __entry->sfh_hash, __entry->tfh_hash, + __get_str(name) + ) +); + +TRACE_EVENT(nfsd_vfs_unlink, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const char *name, + unsigned int len + ), + TP_ARGS(rqstp, fhp, name, len), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __string_len(name, name, len) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __assign_str(name); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x name=%s", + __entry->xid, __entry->fh_hash, + __get_str(name) + ) +); + +TRACE_EVENT(nfsd_vfs_rename, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *sfhp, + const struct svc_fh *tfhp, + const char *source, + unsigned int sourcelen, + const char *target, + unsigned int targetlen + ), + TP_ARGS(rqstp, sfhp, tfhp, source, sourcelen, target, targetlen), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, sfh_hash) + __field(u32, tfh_hash) + __string_len(source, source, sourcelen) + __string_len(target, target, targetlen) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle); + __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle); + __assign_str(source); + __assign_str(target); + ), + TP_printk("xid=0x%08x sfh_hash=0x%08x tfh_hash=0x%08x source=%s target=%s", + __entry->xid, __entry->sfh_hash, __entry->tfh_hash, + __get_str(source), __get_str(target) + ) +); + +TRACE_EVENT(nfsd_vfs_readdir, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + u32 count, + u64 offset + ), + TP_ARGS(rqstp, fhp, count, offset), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __field(u32, count) + __field(u64, offset) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->count = count; + __entry->offset = offset; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu count=%u", + __entry->xid, __entry->fh_hash, + __entry->offset, __entry->count + ) +); + +DECLARE_EVENT_CLASS(nfsd_vfs_getattr_class, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp + ), + TP_ARGS(rqstp, fhp), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x", + __entry->xid, __entry->fh_hash + ) +); + +#define DEFINE_NFSD_VFS_GETATTR_EVENT(__name) \ +DEFINE_EVENT(nfsd_vfs_getattr_class, __name, \ + TP_PROTO( \ + const struct svc_rqst *rqstp, \ + const struct svc_fh *fhp \ + ), \ + TP_ARGS(rqstp, fhp)) + +DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_getattr); +DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_statfs); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 29cb7b812d71..cd689df2ca5d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -31,6 +31,7 @@ #include <linux/exportfs.h> #include <linux/writeback.h> #include <linux/security.h> +#include <linux/sunrpc/xdr.h> #include "xdr3.h" @@ -47,6 +48,8 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP +bool nfsd_disable_splice_read __read_mostly; + /** * nfserrno - Map Linux errnos to NFS errnos * @errno: POSIX(-ish) error code to be mapped @@ -71,7 +74,6 @@ nfserrno (int errno) { nfserr_acces, -EACCES }, { nfserr_exist, -EEXIST }, { nfserr_xdev, -EXDEV }, - { nfserr_mlink, -EMLINK }, { nfserr_nodev, -ENODEV }, { nfserr_notdir, -ENOTDIR }, { nfserr_isdir, -EISDIR }, @@ -245,7 +247,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, struct dentry *dentry; int host_err; - dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); + trace_nfsd_vfs_lookup(rqstp, fhp, name, len); dparent = fhp->fh_dentry; exp = exp_get(fhp->fh_export); @@ -265,7 +267,8 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } } else { - dentry = lookup_one_len_unlocked(name, dparent, len); + dentry = lookup_one_unlocked(&nop_mnt_idmap, + &QSTR_LEN(name, len), dparent); host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_nfserr; @@ -500,6 +503,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, bool size_change = (iap->ia_valid & ATTR_SIZE); int retries; + trace_nfsd_vfs_setattr(rqstp, fhp, iap, guardtime); + if (iap->ia_valid & ATTR_SIZE) { accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; ftype = S_IFREG; @@ -923,7 +928,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, * directories, but we never have and it doesn't seem to have * caused anyone a problem. If we were to change this, note * also that our filldir callbacks would need a variant of - * lookup_one_len that doesn't check permissions. + * lookup_one_positive_unlocked() that doesn't check permissions. */ if (type == S_IFREG) may_flags |= NFSD_MAY_OWNER_OVERRIDE; @@ -1082,23 +1087,23 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned long v, total; struct iov_iter iter; loff_t ppos = offset; - struct page *page; ssize_t host_err; + size_t len; v = 0; total = *count; while (total) { - page = *(rqstp->rq_next_page++); - rqstp->rq_vec[v].iov_base = page_address(page) + base; - rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base); - total -= rqstp->rq_vec[v].iov_len; + len = min_t(size_t, total, PAGE_SIZE - base); + bvec_set_page(&rqstp->rq_bvec[v], *(rqstp->rq_next_page++), + len, base); + total -= len; ++v; base = 0; } - WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec)); + WARN_ON_ONCE(v > rqstp->rq_maxpages); trace_nfsd_read_vector(rqstp, fhp, offset, *count); - iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count); + iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count); host_err = vfs_iter_read(file, &iter, &ppos, 0); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } @@ -1140,11 +1145,27 @@ static int wait_for_concurrent_writes(struct file *file) return err; } +/** + * nfsd_vfs_write - write data to an already-open file + * @rqstp: RPC execution context + * @fhp: File handle of file to write into + * @nf: An open file matching @fhp + * @offset: Byte offset of start + * @payload: xdr_buf containing the write payload + * @cnt: IN: number of bytes to write, OUT: number of bytes actually written + * @stable: An NFS stable_how value + * @verf: NFS WRITE verifier + * + * Upon return, caller must invoke fh_put on @fhp. + * + * Return values: + * An nfsstat value in network byte order. + */ __be32 -nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, - loff_t offset, struct kvec *vec, int vlen, - unsigned long *cnt, int stable, - __be32 *verf) +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_file *nf, loff_t offset, + const struct xdr_buf *payload, unsigned long *cnt, + int stable, __be32 *verf) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file *file = nf->nf_file; @@ -1159,6 +1180,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, unsigned int pflags = current->flags; rwf_t flags = 0; bool restore_flags = false; + unsigned int nvecs; trace_nfsd_write_opened(rqstp, fhp, offset, *cnt); @@ -1186,7 +1208,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, if (stable && !fhp->fh_use_wgather) flags |= RWF_SYNC; - iov_iter_kvec(&iter, ITER_SOURCE, vec, vlen, *cnt); + nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload); + iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt); since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); @@ -1237,6 +1260,8 @@ out_nfserr: */ bool nfsd_read_splice_ok(struct svc_rqst *rqstp) { + if (nfsd_disable_splice_read) + return false; switch (svc_auth_flavor(rqstp)) { case RPC_AUTH_GSS_KRB5I: case RPC_AUTH_GSS_KRB5P: @@ -1284,14 +1309,24 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; } -/* - * Write data to a file. - * The stable flag requests synchronous writes. - * N.B. After this call fhp needs an fh_put +/** + * nfsd_write - open a file and write data to it + * @rqstp: RPC execution context + * @fhp: File handle of file to write into; nfsd_write() may modify it + * @offset: Byte offset of start + * @payload: xdr_buf containing the write payload + * @cnt: IN: number of bytes to write, OUT: number of bytes actually written + * @stable: An NFS stable_how value + * @verf: NFS WRITE verifier + * + * Upon return, caller must invoke fh_put on @fhp. + * + * Return values: + * An nfsstat value in network byte order. */ __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - struct kvec *vec, int vlen, unsigned long *cnt, int stable, + const struct xdr_buf *payload, unsigned long *cnt, int stable, __be32 *verf) { struct nfsd_file *nf; @@ -1303,8 +1338,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, if (err) goto out; - err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec, - vlen, cnt, stable, verf); + err = nfsd_vfs_write(rqstp, fhp, nf, offset, payload, cnt, + stable, verf); nfsd_file_put(nf); out: trace_nfsd_write_done(rqstp, fhp, offset, *cnt); @@ -1340,6 +1375,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t start, end; struct nfsd_net *nn; + trace_nfsd_commit_start(rqstp, fhp, offset, count); + /* * Convert the client-provided (offset, count) range to a * (start, end) range. If the client-provided range falls @@ -1378,6 +1415,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, } else nfsd_copy_write_verifier(verf, nn); + trace_nfsd_commit_done(rqstp, fhp, offset, count); return err; } @@ -1461,7 +1499,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, struct inode *dirp; struct iattr *iap = attrs->na_iattr; __be32 err; - int host_err; + int host_err = 0; dentry = fhp->fh_dentry; dirp = d_inode(dentry); @@ -1488,28 +1526,15 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); - if (!host_err && unlikely(d_unhashed(dchild))) { - struct dentry *d; - d = lookup_one_len(dchild->d_name.name, - dchild->d_parent, - dchild->d_name.len); - if (IS_ERR(d)) { - host_err = PTR_ERR(d); - break; - } - if (unlikely(d_is_negative(d))) { - dput(d); - err = nfserr_serverfault; - goto out; - } + dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); + if (IS_ERR(dchild)) { + host_err = PTR_ERR(dchild); + } else if (d_is_negative(dchild)) { + err = nfserr_serverfault; + goto out; + } else if (unlikely(dchild != resfhp->fh_dentry)) { dput(resfhp->fh_dentry); - resfhp->fh_dentry = dget(d); - err = fh_update(resfhp); - dput(dchild); - dchild = d; - if (err) - goto out; + resfhp->fh_dentry = dget(dchild); } break; case S_IFCHR: @@ -1530,7 +1555,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); out: - dput(dchild); + if (!IS_ERR(dchild)) + dput(dchild); return err; out_nfserr: @@ -1553,6 +1579,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err; int host_err; + trace_nfsd_vfs_create(rqstp, fhp, type, fname, flen); + if (isdotent(fname, flen)) return nfserr_exist; @@ -1567,7 +1595,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfserrno(host_err); inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); - dchild = lookup_one_len(fname, dentry, flen); + dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry); host_err = PTR_ERR(dchild); if (IS_ERR(dchild)) { err = nfserrno(host_err); @@ -1653,6 +1681,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err, cerr; int host_err; + trace_nfsd_vfs_symlink(rqstp, fhp, fname, flen, path); + err = nfserr_noent; if (!flen || path[0] == '\0') goto out; @@ -1672,7 +1702,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, dentry = fhp->fh_dentry; inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); - dnew = lookup_one_len(fname, dentry, flen); + dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry); if (IS_ERR(dnew)) { err = nfserrno(PTR_ERR(dnew)); inode_unlock(dentry->d_inode); @@ -1699,9 +1729,17 @@ out: return err; } -/* - * Create a hardlink - * N.B. After this call _both_ ffhp and tfhp need an fh_put +/** + * nfsd_link - create a link + * @rqstp: RPC transaction context + * @ffhp: the file handle of the directory where the new link is to be created + * @name: the filename of the new link + * @len: the length of @name in octets + * @tfhp: the file handle of an existing file object + * + * After this call _both_ ffhp and tfhp need an fh_put. + * + * Returns a generic NFS status code in network byte-order. */ __be32 nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, @@ -1709,9 +1747,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, { struct dentry *ddir, *dnew, *dold; struct inode *dirp; + int type; __be32 err; int host_err; + trace_nfsd_vfs_link(rqstp, ffhp, tfhp, name, len); + err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; @@ -1728,19 +1769,19 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (isdotent(name, len)) goto out; + err = nfs_ok; + type = d_inode(tfhp->fh_dentry)->i_mode & S_IFMT; host_err = fh_want_write(tfhp); - if (host_err) { - err = nfserrno(host_err); + if (host_err) goto out; - } ddir = ffhp->fh_dentry; dirp = d_inode(ddir); inode_lock_nested(dirp, I_MUTEX_PARENT); - dnew = lookup_one_len(name, ddir, len); + dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(name, len), ddir); if (IS_ERR(dnew)) { - err = nfserrno(PTR_ERR(dnew)); + host_err = PTR_ERR(dnew); goto out_unlock; } @@ -1756,17 +1797,26 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, fh_fill_post_attrs(ffhp); inode_unlock(dirp); if (!host_err) { - err = nfserrno(commit_metadata(ffhp)); - if (!err) - err = nfserrno(commit_metadata(tfhp)); - } else { - err = nfserrno(host_err); + host_err = commit_metadata(ffhp); + if (!host_err) + host_err = commit_metadata(tfhp); } + dput(dnew); out_drop_write: fh_drop_write(tfhp); + if (host_err == -EBUSY) { + /* + * See RFC 8881 Section 18.9.4 para 1-2: NFSv4 LINK + * wants a status unique to the object type. + */ + if (type != S_IFDIR) + err = nfserr_file_open; + else + err = nfserr_acces; + } out: - return err; + return err != nfs_ok ? err : nfserrno(host_err); out_dput: dput(dnew); @@ -1795,9 +1845,19 @@ nfsd_has_cached_files(struct dentry *dentry) return ret; } -/* - * Rename a file - * N.B. After this call _both_ ffhp and tfhp need an fh_put +/** + * nfsd_rename - rename a directory entry + * @rqstp: RPC transaction context + * @ffhp: the file handle of parent directory containing the entry to be renamed + * @fname: the filename of directory entry to be renamed + * @flen: the length of @fname in octets + * @tfhp: the file handle of parent directory to contain the renamed entry + * @tname: the filename of the new entry + * @tlen: the length of @tlen in octets + * + * After this call _both_ ffhp and tfhp need an fh_put. + * + * Returns a generic NFS status code in network byte-order. */ __be32 nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, @@ -1805,10 +1865,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, { struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap; struct inode *fdir, *tdir; + int type = S_IFDIR; __be32 err; int host_err; bool close_cached = false; + trace_nfsd_vfs_rename(rqstp, ffhp, tfhp, fname, flen, tname, tlen); + err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) goto out; @@ -1851,7 +1914,7 @@ retry: if (err != nfs_ok) goto out_unlock; - odentry = lookup_one_len(fname, fdentry, flen); + odentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), fdentry); host_err = PTR_ERR(odentry); if (IS_ERR(odentry)) goto out_nfserr; @@ -1862,11 +1925,14 @@ retry: host_err = -EINVAL; if (odentry == trap) goto out_dput_old; + type = d_inode(odentry)->i_mode & S_IFMT; - ndentry = lookup_one_len(tname, tdentry, tlen); + ndentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(tname, tlen), tdentry); host_err = PTR_ERR(ndentry); if (IS_ERR(ndentry)) goto out_dput_old; + if (d_inode(ndentry)) + type = d_inode(ndentry)->i_mode & S_IFMT; host_err = -ENOTEMPTY; if (ndentry == trap) goto out_dput_new; @@ -1904,7 +1970,18 @@ retry: out_dput_old: dput(odentry); out_nfserr: - err = nfserrno(host_err); + if (host_err == -EBUSY) { + /* + * See RFC 8881 Section 18.26.4 para 1-3: NFSv4 RENAME + * wants a status unique to the object type. + */ + if (type != S_IFDIR) + err = nfserr_file_open; + else + err = nfserr_acces; + } else { + err = nfserrno(host_err); + } if (!close_cached) { fh_fill_post_attrs(ffhp); @@ -1931,9 +2008,17 @@ out: return err; } -/* - * Unlink a file or directory - * N.B. After this call fhp needs an fh_put +/** + * nfsd_unlink - remove a directory entry + * @rqstp: RPC transaction context + * @fhp: the file handle of the parent directory to be modified + * @type: enforced file type of the object to be removed + * @fname: the name of directory entry to be removed + * @flen: length of @fname in octets + * + * After this call fhp needs an fh_put. + * + * Returns a generic NFS status code in network byte-order. */ __be32 nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, @@ -1945,6 +2030,8 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, __be32 err; int host_err; + trace_nfsd_vfs_unlink(rqstp, fhp, fname, flen); + err = nfserr_acces; if (!flen || isdotent(fname, flen)) goto out; @@ -1960,7 +2047,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, dirp = d_inode(dentry); inode_lock_nested(dirp, I_MUTEX_PARENT); - rdentry = lookup_one_len(fname, dentry, flen); + rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry); host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) goto out_unlock; @@ -2007,15 +2094,17 @@ out_drop_write: fh_drop_write(fhp); out_nfserr: if (host_err == -EBUSY) { - /* name is mounted-on. There is no perfect - * error status. + /* + * See RFC 8881 Section 18.25.4 para 4: NFSv4 REMOVE + * wants a status unique to the object type. */ - err = nfserr_file_open; - } else { - err = nfserrno(host_err); + if (type != S_IFDIR) + err = nfserr_file_open; + else + err = nfserr_acces; } out: - return err; + return err != nfs_ok ? err : nfserrno(host_err); out_unlock: inode_unlock(dirp); goto out_drop_write; @@ -2231,6 +2320,8 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, in { __be32 err; + trace_nfsd_vfs_statfs(rqstp, fhp); + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); if (!err) { struct path path = { diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index f9b09b842856..eff04959606f 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -128,13 +128,13 @@ bool nfsd_read_splice_ok(struct svc_rqst *rqstp); __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, unsigned long *count, u32 *eof); -__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, - struct kvec *, int, unsigned long *, - int stable, __be32 *verf); +__be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, const struct xdr_buf *payload, + unsigned long *cnt, int stable, __be32 *verf); __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, - struct kvec *vec, int vlen, unsigned long *cnt, - int stable, __be32 *verf); + const struct xdr_buf *payload, + unsigned long *cnt, int stable, __be32 *verf); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index c26ba86dbdfd..aa2a356da784 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -676,6 +676,10 @@ struct nfsd4_cb_offload { __be32 co_nfserr; unsigned int co_retries; struct knfsd_fh co_fh; + + struct nfs4_sessionid co_referring_sessionid; + u32 co_referring_slotid; + u32 co_referring_seqno; }; struct nfsd4_copy { diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index f1a315cd31b7..f4e29c0c701c 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -6,8 +6,11 @@ #define cb_compound_enc_hdr_sz 4 #define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) #define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) +#define enc_referring_call4_sz (1 + 1) +#define enc_referring_call_list4_sz (sessionid_sz + 1 + \ + enc_referring_call4_sz) #define cb_sequence_enc_sz (sessionid_sz + 4 + \ - 1 /* no referring calls list yet */) + enc_referring_call_list4_sz) #define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) #define op_enc_sz 1 |