summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-06 13:22:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-06 13:22:21 -0700
commit81538c8e42806eed71ce125723877a7c2307370c (patch)
tree71abd849602e6a97e156d17bb5d93f27ba74cbe2
parent256e3417065b2721f77bcd37331796b59483ef3b (diff)
parent73cc6ec1a89a6c443a77b9b93ddcea63b7cea223 (diff)
Merge tag 'nfsd-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd updates from Chuck Lever: "Mike Snitzer has prototyped a mechanism for disabling I/O caching in NFSD. This is introduced in v6.18 as an experimental feature. This enables scaling NFSD in /both/ directions: - NFS service can be supported on systems with small memory footprints, such as low-cost cloud instances - Large NFS workloads will be less likely to force the eviction of server-local activity, helping it avoid thrashing Jeff Layton contributed a number of fixes to the new attribute delegation implementation (based on a pending Internet RFC) that we hope will make attribute delegation reliable enough to enable by default, as it is on the Linux NFS client. The remaining patches in this pull request are clean-ups and minor optimizations. Many thanks to the contributors, reviewers, testers, and bug reporters who participated during the v6.18 NFSD development cycle" * tag 'nfsd-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (42 commits) nfsd: discard nfserr_dropit SUNRPC: Make RPCSEC_GSS_KRB5 select CRYPTO instead of depending on it NFSD: Add io_cache_{read,write} controls to debugfs NFSD: Do the grace period check in ->proc_layoutget nfsd: delete unnecessary NULL check in __fh_verify() NFSD: Allow layoutcommit during grace period NFSD: Disallow layoutget during grace period sunrpc: fix "occurence"->"occurrence" nfsd: Don't force CRYPTO_LIB_SHA256 to be built-in nfsd: nfserr_jukebox in nlm_fopen should lead to a retry NFSD: Reduce DRC bucket size NFSD: Delay adding new entries to LRU SUNRPC: Move the svc_rpcb_cleanup() call sites NFS: Remove rpcbind cleanup for NFSv4.0 callback nfsd: unregister with rpcbind when deleting a transport NFSD: Drop redundant conversion to bool sunrpc: eliminate return pointer in svc_tcp_sendmsg() sunrpc: fix pr_notice in svc_tcp_sendto() to show correct length nfsd: decouple the xprtsec policy check from check_nfsd_access() NFSD: Fix destination buffer size in nfsd4_ssc_setup_dul() ...
-rw-r--r--fs/attr.c44
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/nfsd/Kconfig2
-rw-r--r--fs/nfsd/blocklayout.c32
-rw-r--r--fs/nfsd/blocklayoutxdr.c86
-rw-r--r--fs/nfsd/blocklayoutxdr.h4
-rw-r--r--fs/nfsd/debugfs.c95
-rw-r--r--fs/nfsd/export.c82
-rw-r--r--fs/nfsd/export.h3
-rw-r--r--fs/nfsd/filecache.c21
-rw-r--r--fs/nfsd/filecache.h1
-rw-r--r--fs/nfsd/flexfilelayout.c4
-rw-r--r--fs/nfsd/flexfilelayoutxdr.c3
-rw-r--r--fs/nfsd/localio.c1
-rw-r--r--fs/nfsd/lockd.c15
-rw-r--r--fs/nfsd/nfs4layouts.c1
-rw-r--r--fs/nfsd/nfs4proc.c125
-rw-r--r--fs/nfsd/nfs4recover.c31
-rw-r--r--fs/nfsd/nfs4state.c86
-rw-r--r--fs/nfsd/nfs4xdr.c32
-rw-r--r--fs/nfsd/nfscache.c15
-rw-r--r--fs/nfsd/nfsd.h17
-rw-r--r--fs/nfsd/nfsfh.c55
-rw-r--r--fs/nfsd/nfsfh.h38
-rw-r--r--fs/nfsd/pnfs.h5
-rw-r--r--fs/nfsd/state.h16
-rw-r--r--fs/nfsd/vfs.c23
-rw-r--r--fs/nfsd/vfs.h37
-rw-r--r--fs/nfsd/xdr4.h39
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/nfslocalio.h1
-rw-r--r--include/linux/sunrpc/svc_xprt.h3
-rw-r--r--include/linux/sunrpc/xdr.h4
-rw-r--r--net/sunrpc/Kconfig3
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/svc.c17
-rw-r--r--net/sunrpc/svc_xprt.c13
-rw-r--r--net/sunrpc/svcsock.c25
-rw-r--r--net/sunrpc/sysfs.c2
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j22
40 files changed, 664 insertions, 324 deletions
diff --git a/fs/attr.c b/fs/attr.c
index 5425c1dbbff9..795f231d00e8 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -286,20 +286,12 @@ static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr)
unsigned int ia_valid = attr->ia_valid;
struct timespec64 now;
- if (ia_valid & ATTR_CTIME) {
- /*
- * In the case of an update for a write delegation, we must respect
- * the value in ia_ctime and not use the current time.
- */
- if (ia_valid & ATTR_DELEG)
- now = inode_set_ctime_deleg(inode, attr->ia_ctime);
- else
- now = inode_set_ctime_current(inode);
- } else {
- /* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */
- WARN_ON_ONCE(ia_valid & ATTR_MTIME);
+ if (ia_valid & ATTR_CTIME_SET)
+ now = inode_set_ctime_deleg(inode, attr->ia_ctime);
+ else if (ia_valid & ATTR_CTIME)
+ now = inode_set_ctime_current(inode);
+ else
now = current_time(inode);
- }
if (ia_valid & ATTR_ATIME_SET)
inode_set_atime_to_ts(inode, attr->ia_atime);
@@ -359,12 +351,11 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
inode_set_atime_to_ts(inode, attr->ia_atime);
if (ia_valid & ATTR_MTIME)
inode_set_mtime_to_ts(inode, attr->ia_mtime);
- if (ia_valid & ATTR_CTIME) {
- if (ia_valid & ATTR_DELEG)
- inode_set_ctime_deleg(inode, attr->ia_ctime);
- else
- inode_set_ctime_to_ts(inode, attr->ia_ctime);
- }
+
+ if (ia_valid & ATTR_CTIME_SET)
+ inode_set_ctime_deleg(inode, attr->ia_ctime);
+ else if (ia_valid & ATTR_CTIME)
+ inode_set_ctime_to_ts(inode, attr->ia_ctime);
}
EXPORT_SYMBOL(setattr_copy);
@@ -463,15 +454,18 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
now = current_time(inode);
- attr->ia_ctime = now;
- if (!(ia_valid & ATTR_ATIME_SET))
- attr->ia_atime = now;
- else
+ if (ia_valid & ATTR_ATIME_SET)
attr->ia_atime = timestamp_truncate(attr->ia_atime, inode);
- if (!(ia_valid & ATTR_MTIME_SET))
- attr->ia_mtime = now;
else
+ attr->ia_atime = now;
+ if (ia_valid & ATTR_CTIME_SET)
+ attr->ia_ctime = timestamp_truncate(attr->ia_ctime, inode);
+ else
+ attr->ia_ctime = now;
+ if (ia_valid & ATTR_MTIME_SET)
attr->ia_mtime = timestamp_truncate(attr->ia_mtime, inode);
+ else
+ attr->ia_mtime = now;
if (ia_valid & ATTR_KILL_PRIV) {
error = security_inode_need_killpriv(dentry);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c1315df4b350..a31dc9588eb8 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -980,7 +980,7 @@ nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status)
struct file_lock *fl;
int error;
- dprintk("grant_reply: looking for cookie %x, s=%d \n",
+ dprintk("grant_reply: looking for cookie %x, s=%d\n",
*(unsigned int *)(cookie->data), status);
if (!(block = nlmsvc_find_block(cookie)))
return;
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 879e0b104d1c..e134dce45e35 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -5,6 +5,7 @@ config NFSD
depends on FILE_LOCKING
depends on FSNOTIFY
select CRC32
+ select CRYPTO_LIB_SHA256 if NFSD_V4
select LOCKD
select SUNRPC
select EXPORTFS
@@ -77,7 +78,6 @@ config NFSD_V4
select FS_POSIX_ACL
select RPCSEC_GSS_KRB5
select CRYPTO
- select CRYPTO_LIB_SHA256
select CRYPTO_MD5
select GRACE_PERIOD
select NFS_V4_2_SSC_HELPER if NFS_V4_2
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 19078a043e85..fde5539cf6a6 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -18,8 +18,8 @@
static __be32
-nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
- struct nfsd4_layoutget *args)
+nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
+ const struct svc_fh *fhp, struct nfsd4_layoutget *args)
{
struct nfsd4_layout_seg *seg = &args->lg_seg;
struct super_block *sb = inode->i_sb;
@@ -29,6 +29,9 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
u32 device_generation = 0;
int error;
+ if (locks_in_grace(SVC_NET(rqstp)))
+ return nfserr_grace;
+
if (seg->offset & (block_size - 1)) {
dprintk("pnfsd: I/O misaligned\n");
goto out_layoutunavailable;
@@ -118,7 +121,6 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
struct iomap *iomaps, int nr_iomaps)
{
struct timespec64 mtime = inode_get_mtime(inode);
- loff_t new_size = lcp->lc_last_wr + 1;
struct iattr iattr = { .ia_valid = 0 };
int error;
@@ -128,9 +130,9 @@ nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
- if (new_size > i_size_read(inode)) {
+ if (lcp->lc_size_chg) {
iattr.ia_valid |= ATTR_SIZE;
- iattr.ia_size = new_size;
+ iattr.ia_size = lcp->lc_newsize;
}
error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps,
@@ -173,16 +175,18 @@ nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
}
static __be32
-nfsd4_block_proc_layoutcommit(struct inode *inode,
+nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
struct nfsd4_layoutcommit *lcp)
{
struct iomap *iomaps;
int nr_iomaps;
__be32 nfserr;
- nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, &nr_iomaps,
- i_blocksize(inode));
+ rqstp->rq_arg = lcp->lc_up_layout;
+ svcxdr_init_decode(rqstp);
+
+ nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream,
+ &iomaps, &nr_iomaps, i_blocksize(inode));
if (nfserr != nfs_ok)
return nfserr;
@@ -313,16 +317,18 @@ nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
}
static __be32
-nfsd4_scsi_proc_layoutcommit(struct inode *inode,
+nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
struct nfsd4_layoutcommit *lcp)
{
struct iomap *iomaps;
int nr_iomaps;
__be32 nfserr;
- nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, &nr_iomaps,
- i_blocksize(inode));
+ rqstp->rq_arg = lcp->lc_up_layout;
+ svcxdr_init_decode(rqstp);
+
+ nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream,
+ &iomaps, &nr_iomaps, i_blocksize(inode));
if (nfserr != nfs_ok)
return nfserr;
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index bcf21fde9120..e50afe340737 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -29,8 +29,7 @@ nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
*p++ = cpu_to_be32(len);
*p++ = cpu_to_be32(1); /* we always return a single extent */
- p = xdr_encode_opaque_fixed(p, &b->vol_id,
- sizeof(struct nfsd4_deviceid));
+ p = svcxdr_encode_deviceid4(p, &b->vol_id);
p = xdr_encode_hyper(p, b->foff);
p = xdr_encode_hyper(p, b->len);
p = xdr_encode_hyper(p, b->soff);
@@ -114,8 +113,7 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
/**
* nfsd4_block_decode_layoutupdate - decode the block layout extent array
- * @p: pointer to the xdr data
- * @len: number of bytes to decode
+ * @xdr: subbuf set to the encoded array
* @iomapp: pointer to store the decoded extent array
* @nr_iomapsp: pointer to store the number of extents
* @block_size: alignment of extent offset and length
@@ -128,25 +126,24 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
*
* Return values:
* %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
- * %nfserr_bad_xdr: The encoded array in @p is invalid
+ * %nfserr_bad_xdr: The encoded array in @xdr is invalid
* %nfserr_inval: An unaligned extent found
* %nfserr_delay: Failed to allocate memory for @iomapp
*/
__be32
-nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
int *nr_iomapsp, u32 block_size)
{
struct iomap *iomaps;
- u32 nr_iomaps, i;
+ u32 nr_iomaps, expected, len, i;
+ __be32 nfserr;
- if (len < sizeof(u32))
- return nfserr_bad_xdr;
- len -= sizeof(u32);
- if (len % PNFS_BLOCK_EXTENT_SIZE)
+ if (xdr_stream_decode_u32(xdr, &nr_iomaps))
return nfserr_bad_xdr;
- nr_iomaps = be32_to_cpup(p++);
- if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE)
+ len = sizeof(__be32) + xdr_stream_remaining(xdr);
+ expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
+ if (len != expected)
return nfserr_bad_xdr;
iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
@@ -156,23 +153,44 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
for (i = 0; i < nr_iomaps; i++) {
struct pnfs_block_extent bex;
- memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid));
- p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid));
+ if (nfsd4_decode_deviceid4(xdr, &bex.vol_id)) {
+ nfserr = nfserr_bad_xdr;
+ goto fail;
+ }
- p = xdr_decode_hyper(p, &bex.foff);
+ if (xdr_stream_decode_u64(xdr, &bex.foff)) {
+ nfserr = nfserr_bad_xdr;
+ goto fail;
+ }
if (bex.foff & (block_size - 1)) {
+ nfserr = nfserr_inval;
+ goto fail;
+ }
+
+ if (xdr_stream_decode_u64(xdr, &bex.len)) {
+ nfserr = nfserr_bad_xdr;
goto fail;
}
- p = xdr_decode_hyper(p, &bex.len);
if (bex.len & (block_size - 1)) {
+ nfserr = nfserr_inval;
+ goto fail;
+ }
+
+ if (xdr_stream_decode_u64(xdr, &bex.soff)) {
+ nfserr = nfserr_bad_xdr;
goto fail;
}
- p = xdr_decode_hyper(p, &bex.soff);
if (bex.soff & (block_size - 1)) {
+ nfserr = nfserr_inval;
+ goto fail;
+ }
+
+ if (xdr_stream_decode_u32(xdr, &bex.es)) {
+ nfserr = nfserr_bad_xdr;
goto fail;
}
- bex.es = be32_to_cpup(p++);
if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
+ nfserr = nfserr_inval;
goto fail;
}
@@ -185,13 +203,12 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
return nfs_ok;
fail:
kfree(iomaps);
- return nfserr_inval;
+ return nfserr;
}
/**
* nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array
- * @p: pointer to the xdr data
- * @len: number of bytes to decode
+ * @xdr: subbuf set to the encoded array
* @iomapp: pointer to store the decoded extent array
* @nr_iomapsp: pointer to store the number of extents
* @block_size: alignment of extent offset and length
@@ -203,21 +220,22 @@ fail:
*
* Return values:
* %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
- * %nfserr_bad_xdr: The encoded array in @p is invalid
+ * %nfserr_bad_xdr: The encoded array in @xdr is invalid
* %nfserr_inval: An unaligned extent found
* %nfserr_delay: Failed to allocate memory for @iomapp
*/
__be32
-nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
int *nr_iomapsp, u32 block_size)
{
struct iomap *iomaps;
- u32 nr_iomaps, expected, i;
+ u32 nr_iomaps, expected, len, i;
+ __be32 nfserr;
- if (len < sizeof(u32))
+ if (xdr_stream_decode_u32(xdr, &nr_iomaps))
return nfserr_bad_xdr;
- nr_iomaps = be32_to_cpup(p++);
+ len = sizeof(__be32) + xdr_stream_remaining(xdr);
expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
if (len != expected)
return nfserr_bad_xdr;
@@ -229,14 +247,22 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
for (i = 0; i < nr_iomaps; i++) {
u64 val;
- p = xdr_decode_hyper(p, &val);
+ if (xdr_stream_decode_u64(xdr, &val)) {
+ nfserr = nfserr_bad_xdr;
+ goto fail;
+ }
if (val & (block_size - 1)) {
+ nfserr = nfserr_inval;
goto fail;
}
iomaps[i].offset = val;
- p = xdr_decode_hyper(p, &val);
+ if (xdr_stream_decode_u64(xdr, &val)) {
+ nfserr = nfserr_bad_xdr;
+ goto fail;
+ }
if (val & (block_size - 1)) {
+ nfserr = nfserr_inval;
goto fail;
}
iomaps[i].length = val;
@@ -247,5 +273,5 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
return nfs_ok;
fail:
kfree(iomaps);
- return nfserr_inval;
+ return nfserr;
}
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index 15b3569f3d9a..7d25ef689671 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
const struct nfsd4_getdeviceinfo *gdp);
__be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
const struct nfsd4_layoutget *lgp);
-__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len,
+__be32 nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr,
struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
-__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len,
+__be32 nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr,
struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
#endif /* _NFSD_BLOCKLAYOUTXDR_H */
diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c
index 84b0c8b559dc..ed2b9e066206 100644
--- a/fs/nfsd/debugfs.c
+++ b/fs/nfsd/debugfs.c
@@ -26,12 +26,99 @@ static int nfsd_dsr_get(void *data, u64 *val)
static int nfsd_dsr_set(void *data, u64 val)
{
- nfsd_disable_splice_read = (val > 0) ? true : false;
+ nfsd_disable_splice_read = (val > 0);
+ if (!nfsd_disable_splice_read) {
+ /*
+ * Must use buffered I/O if splice_read is enabled.
+ */
+ nfsd_io_cache_read = NFSD_IO_BUFFERED;
+ }
return 0;
}
DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n");
+/*
+ * /sys/kernel/debug/nfsd/io_cache_read
+ *
+ * Contents:
+ * %0: NFS READ will use buffered IO
+ * %1: NFS READ will use dontcache (buffered IO w/ dropbehind)
+ *
+ * This setting takes immediate effect for all NFS versions,
+ * all exports, and in all NFSD net namespaces.
+ */
+
+static int nfsd_io_cache_read_get(void *data, u64 *val)
+{
+ *val = nfsd_io_cache_read;
+ return 0;
+}
+
+static int nfsd_io_cache_read_set(void *data, u64 val)
+{
+ int ret = 0;
+
+ switch (val) {
+ case NFSD_IO_BUFFERED:
+ nfsd_io_cache_read = NFSD_IO_BUFFERED;
+ break;
+ case NFSD_IO_DONTCACHE:
+ /*
+ * Must disable splice_read when enabling
+ * NFSD_IO_DONTCACHE.
+ */
+ nfsd_disable_splice_read = true;
+ nfsd_io_cache_read = val;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(nfsd_io_cache_read_fops, nfsd_io_cache_read_get,
+ nfsd_io_cache_read_set, "%llu\n");
+
+/*
+ * /sys/kernel/debug/nfsd/io_cache_write
+ *
+ * Contents:
+ * %0: NFS WRITE will use buffered IO
+ * %1: NFS WRITE will use dontcache (buffered IO w/ dropbehind)
+ *
+ * This setting takes immediate effect for all NFS versions,
+ * all exports, and in all NFSD net namespaces.
+ */
+
+static int nfsd_io_cache_write_get(void *data, u64 *val)
+{
+ *val = nfsd_io_cache_write;
+ return 0;
+}
+
+static int nfsd_io_cache_write_set(void *data, u64 val)
+{
+ int ret = 0;
+
+ switch (val) {
+ case NFSD_IO_BUFFERED:
+ case NFSD_IO_DONTCACHE:
+ nfsd_io_cache_write = val;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(nfsd_io_cache_write_fops, nfsd_io_cache_write_get,
+ nfsd_io_cache_write_set, "%llu\n");
+
void nfsd_debugfs_exit(void)
{
debugfs_remove_recursive(nfsd_top_dir);
@@ -44,4 +131,10 @@ void nfsd_debugfs_init(void)
debugfs_create_file("disable-splice-read", S_IWUSR | S_IRUGO,
nfsd_top_dir, NULL, &nfsd_dsr_fops);
+
+ debugfs_create_file("io_cache_read", 0644, nfsd_top_dir, NULL,
+ &nfsd_io_cache_read_fops);
+
+ debugfs_create_file("io_cache_write", 0644, nfsd_top_dir, NULL,
+ &nfsd_io_cache_write_fops);
}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index caa695c06efb..9d55512d0cc9 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1082,50 +1082,62 @@ static struct svc_export *exp_find(struct cache_detail *cd,
}
/**
- * check_nfsd_access - check if access to export is allowed.
+ * check_xprtsec_policy - check if access to export is allowed by the
+ * xprtsec policy
* @exp: svc_export that is being accessed.
- * @rqstp: svc_rqst attempting to access @exp (will be NULL for LOCALIO).
- * @may_bypass_gss: reduce strictness of authorization check
+ * @rqstp: svc_rqst attempting to access @exp.
+ *
+ * Helper function for check_nfsd_access(). Note that callers should be
+ * using check_nfsd_access() instead of calling this function directly. The
+ * one exception is __fh_verify() since it has logic that may result in one
+ * or both of the helpers being skipped.
*
* Return values:
* %nfs_ok if access is granted, or
* %nfserr_wrongsec if access is denied
*/
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
- bool may_bypass_gss)
+__be32 check_xprtsec_policy(struct svc_export *exp, struct svc_rqst *rqstp)
{
- struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
- struct svc_xprt *xprt;
-
- /*
- * If rqstp is NULL, this is a LOCALIO request which will only
- * ever use a filehandle/credential pair for which access has
- * been affirmed (by ACCESS or OPEN NFS requests) over the
- * wire. So there is no need for further checks here.
- */
- if (!rqstp)
- return nfs_ok;
-
- xprt = rqstp->rq_xprt;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) {
if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags))
- goto ok;
+ return nfs_ok;
}
if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) {
if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
!test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
- goto ok;
+ return nfs_ok;
}
if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) {
if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
- goto ok;
+ return nfs_ok;
}
- if (!may_bypass_gss)
- goto denied;
+ return nfserr_wrongsec;
+}
+
+/**
+ * check_security_flavor - check if access to export is allowed by the
+ * security flavor
+ * @exp: svc_export that is being accessed.
+ * @rqstp: svc_rqst attempting to access @exp.
+ * @may_bypass_gss: reduce strictness of authorization check
+ *
+ * Helper function for check_nfsd_access(). Note that callers should be
+ * using check_nfsd_access() instead of calling this function directly. The
+ * one exception is __fh_verify() since it has logic that may result in one
+ * or both of the helpers being skipped.
+ *
+ * Return values:
+ * %nfs_ok if access is granted, or
+ * %nfserr_wrongsec if access is denied
+ */
+__be32 check_security_flavor(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss)
+{
+ struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
-ok:
/* legacy gss-only clients are always OK: */
if (exp->ex_client == rqstp->rq_gssclient)
return nfs_ok;
@@ -1167,10 +1179,30 @@ ok:
}
}
-denied:
return nfserr_wrongsec;
}
+/**
+ * check_nfsd_access - check if access to export is allowed.
+ * @exp: svc_export that is being accessed.
+ * @rqstp: svc_rqst attempting to access @exp.
+ * @may_bypass_gss: reduce strictness of authorization check
+ *
+ * Return values:
+ * %nfs_ok if access is granted, or
+ * %nfserr_wrongsec if access is denied
+ */
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss)
+{
+ __be32 status;
+
+ status = check_xprtsec_policy(exp, rqstp);
+ if (status != nfs_ok)
+ return status;
+ return check_security_flavor(exp, rqstp, may_bypass_gss);
+}
+
/*
* Uses rq_client and rq_gssclient to find an export; uses rq_client (an
* auth_unix client) if it's available and has secinfo information;
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index cb36e6cce829..d2b09cd76145 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -101,6 +101,9 @@ struct svc_expkey {
struct svc_cred;
int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp);
+__be32 check_xprtsec_policy(struct svc_export *exp, struct svc_rqst *rqstp);
+__be32 check_security_flavor(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss);
__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
bool may_bypass_gss);
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index e010d90aeb27..a238b6725008 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -395,27 +395,6 @@ nfsd_file_put_local(struct nfsd_file __rcu **pnf)
}
/**
- * nfsd_file_get_local - get nfsd_file reference and reference to net
- * @nf: nfsd_file of which to put the reference
- *
- * Get reference to both the nfsd_file and nf->nf_net.
- */
-struct nfsd_file *
-nfsd_file_get_local(struct nfsd_file *nf)
-{
- struct net *net = nf->nf_net;
-
- if (nfsd_net_try_get(net)) {
- nf = nfsd_file_get(nf);
- if (!nf)
- nfsd_net_put(net);
- } else {
- nf = NULL;
- }
- return nf;
-}
-
-/**
* nfsd_file_file - get the backing file of an nfsd_file
* @nf: nfsd_file of which to access the backing file.
*
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 237a05c74211..e3d6ca2b6030 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -67,7 +67,6 @@ int nfsd_file_cache_start_net(struct net *net);
void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf);
struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf);
-struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
struct file *nfsd_file_file(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
index 3ca5304440ff..c318cf74e388 100644
--- a/fs/nfsd/flexfilelayout.c
+++ b/fs/nfsd/flexfilelayout.c
@@ -20,8 +20,8 @@
#define NFSDDBG_FACILITY NFSDDBG_PNFS
static __be32
-nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
- struct nfsd4_layoutget *args)
+nfsd4_ff_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
+ const struct svc_fh *fhp, struct nfsd4_layoutget *args)
{
struct nfsd4_layout_seg *seg = &args->lg_seg;
u32 device_generation = 0;
diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c
index aeb71c10ff1b..f9f7e38cba13 100644
--- a/fs/nfsd/flexfilelayoutxdr.c
+++ b/fs/nfsd/flexfilelayoutxdr.c
@@ -54,8 +54,7 @@ nfsd4_ff_encode_layoutget(struct xdr_stream *xdr,
*p++ = cpu_to_be32(1); /* single mirror */
*p++ = cpu_to_be32(1); /* single data server */
- p = xdr_encode_opaque_fixed(p, &fl->deviceid,
- sizeof(struct nfsd4_deviceid));
+ p = svcxdr_encode_deviceid4(p, &fl->deviceid);
*p++ = cpu_to_be32(1); /* efficiency */
diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
index 9e0a37cd29d8..be710d809a3b 100644
--- a/fs/nfsd/localio.c
+++ b/fs/nfsd/localio.c
@@ -132,7 +132,6 @@ static const struct nfsd_localio_operations nfsd_localio_ops = {
.nfsd_net_put = nfsd_net_put,
.nfsd_open_local_fh = nfsd_open_local_fh,
.nfsd_file_put_local = nfsd_file_put_local,
- .nfsd_file_get_local = nfsd_file_get_local,
.nfsd_file_file = nfsd_file_file,
.nfsd_file_dio_alignment = nfsd_file_dio_alignment,
};
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index edc9f75dc75c..c774ce9aa296 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -57,7 +57,20 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
switch (nfserr) {
case nfs_ok:
return 0;
- case nfserr_dropit:
+ case nfserr_jukebox:
+ /* this error can indicate a presence of a conflicting
+ * delegation to an NLM lock request. Options are:
+ * (1) For now, drop this request and make the client
+ * retry. When delegation is returned, client's lock retry
+ * will complete.
+ * (2) NLM4_DENIED as per "spec" signals to the client
+ * that the lock is unavailable now but client can retry.
+ * Linux client implementation does not. It treats
+ * NLM4_DENIED same as NLM4_FAILED and errors the request.
+ * (3) For the future, treat this as blocked lock and try
+ * to callback when the delegation is returned but might
+ * not have a proper lock request to block on.
+ */
return nlm_drop_reply;
case nfserr_stale:
return nlm_stale_fh;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index aea905fcaf87..683bd1130afe 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -120,7 +120,6 @@ nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
id->fsid_idx = fhp->fh_export->ex_devid_map->idx;
id->generation = device_generation;
- id->pad = 0;
return 0;
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 71b428efcbb5..e466cf52d7d7 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1133,6 +1133,35 @@ nfsd4_secinfo_no_name_release(union nfsd4_op_u *u)
exp_put(u->secinfo_no_name.sin_exp);
}
+/*
+ * Validate that the requested timestamps are within the acceptable range. If
+ * timestamp appears to be in the future, then it will be clamped to
+ * current_time().
+ */
+static void
+vet_deleg_attrs(struct nfsd4_setattr *setattr, struct nfs4_delegation *dp)
+{
+ struct timespec64 now = current_time(dp->dl_stid.sc_file->fi_inode);
+ struct iattr *iattr = &setattr->sa_iattr;
+
+ if ((setattr->sa_bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) &&
+ !nfsd4_vet_deleg_time(&iattr->ia_atime, &dp->dl_atime, &now))
+ iattr->ia_valid &= ~(ATTR_ATIME | ATTR_ATIME_SET);
+
+ if (setattr->sa_bmval[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) {
+ if (nfsd4_vet_deleg_time(&iattr->ia_mtime, &dp->dl_mtime, &now)) {
+ iattr->ia_ctime = iattr->ia_mtime;
+ if (nfsd4_vet_deleg_time(&iattr->ia_ctime, &dp->dl_ctime, &now))
+ dp->dl_setattr = true;
+ else
+ iattr->ia_valid &= ~(ATTR_CTIME | ATTR_CTIME_SET);
+ } else {
+ iattr->ia_valid &= ~(ATTR_CTIME | ATTR_CTIME_SET |
+ ATTR_MTIME | ATTR_MTIME_SET);
+ }
+ }
+}
+
static __be32
nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -1170,8 +1199,10 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_delegation *dp = delegstateid(st);
/* Only for *_ATTRS_DELEG flavors */
- if (deleg_attrs_deleg(dp->dl_type))
+ if (deleg_attrs_deleg(dp->dl_type)) {
+ vet_deleg_attrs(setattr, dp);
status = nfs_ok;
+ }
}
}
if (st)
@@ -1209,12 +1240,26 @@ out:
return status;
}
+static void nfsd4_file_mark_deleg_written(struct nfs4_file *fi)
+{
+ spin_lock(&fi->fi_lock);
+ if (!list_empty(&fi->fi_delegations)) {
+ struct nfs4_delegation *dp = list_first_entry(&fi->fi_delegations,
+ struct nfs4_delegation, dl_perfile);
+
+ if (dp->dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG)
+ dp->dl_written = true;
+ }
+ spin_unlock(&fi->fi_lock);
+}
+
static __be32
nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_write *write = &u->write;
stateid_t *stateid = &write->wr_stateid;
+ struct nfs4_stid *stid = NULL;
struct nfsd_file *nf = NULL;
__be32 status = nfs_ok;
unsigned long cnt;
@@ -1227,10 +1272,15 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
trace_nfsd_write_start(rqstp, &cstate->current_fh,
write->wr_offset, cnt);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
- stateid, WR_STATE, &nf, NULL);
+ stateid, WR_STATE, &nf, &stid);
if (status)
return status;
+ if (stid) {
+ nfsd4_file_mark_deleg_written(stid->sc_file);
+ nfs4_put_stid(stid);
+ }
+
write->wr_how_written = write->wr_stable_how;
status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
write->wr_offset, &write->wr_payload,
@@ -1469,7 +1519,7 @@ try_again:
return 0;
}
if (work) {
- strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1);
+ strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
refcount_set(&work->nsui_refcnt, 2);
work->nsui_busy = true;
list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list);
@@ -2447,7 +2497,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
if (atomic_read(&ls->ls_stid.sc_file->fi_lo_recalls))
goto out_put_stid;
- nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
+ nfserr = ops->proc_layoutget(rqstp, d_inode(current_fh->fh_dentry),
current_fh, lgp);
if (nfserr)
goto out_put_stid;
@@ -2471,11 +2521,11 @@ static __be32
nfsd4_layoutcommit(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
{
+ struct net *net = SVC_NET(rqstp);
struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
struct svc_fh *current_fh = &cstate->current_fh;
const struct nfsd4_layout_ops *ops;
- loff_t new_size = lcp->lc_last_wr + 1;
struct inode *inode;
struct nfs4_layout_stateid *ls;
__be32 nfserr;
@@ -2491,43 +2541,50 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
goto out;
inode = d_inode(current_fh->fh_dentry);
- nfserr = nfserr_inval;
- if (new_size <= seg->offset) {
- dprintk("pnfsd: last write before layout segment\n");
- goto out;
+ lcp->lc_size_chg = false;
+ if (lcp->lc_newoffset) {
+ loff_t new_size = lcp->lc_last_wr + 1;
+
+ nfserr = nfserr_inval;
+ if (new_size <= seg->offset)
+ goto out;
+ if (new_size > seg->offset + seg->length)
+ goto out;
+
+ if (new_size > i_size_read(inode)) {
+ lcp->lc_size_chg = true;
+ lcp->lc_newsize = new_size;
+ }
}
- if (new_size > seg->offset + seg->length) {
- dprintk("pnfsd: last write beyond layout segment\n");
+
+ nfserr = nfserr_grace;
+ if (locks_in_grace(net) && !lcp->lc_reclaim)
goto out;
- }
- if (!lcp->lc_newoffset && new_size > i_size_read(inode)) {
- dprintk("pnfsd: layoutcommit beyond EOF\n");
+ nfserr = nfserr_no_grace;
+ if (!locks_in_grace(net) && lcp->lc_reclaim)
goto out;
- }
- nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid,
- false, lcp->lc_layout_type,
- &ls);
- if (nfserr) {
- trace_nfsd_layout_commit_lookup_fail(&lcp->lc_sid);
- /* fixup error code as per RFC5661 */
- if (nfserr == nfserr_bad_stateid)
- nfserr = nfserr_badlayout;
- goto out;
+ if (!lcp->lc_reclaim) {
+ nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate,
+ &lcp->lc_sid, false, lcp->lc_layout_type, &ls);
+ if (nfserr) {
+ trace_nfsd_layout_commit_lookup_fail(&lcp->lc_sid);
+ /* fixup error code as per RFC5661 */
+ if (nfserr == nfserr_bad_stateid)
+ nfserr = nfserr_badlayout;
+ goto out;
+ }
+
+ /* LAYOUTCOMMIT does not require any serialization */
+ mutex_unlock(&ls->ls_mutex);
}
- /* LAYOUTCOMMIT does not require any serialization */
- mutex_unlock(&ls->ls_mutex);
+ nfserr = ops->proc_layoutcommit(inode, rqstp, lcp);
- if (new_size > i_size_read(inode)) {
- lcp->lc_size_chg = true;
- lcp->lc_newsize = new_size;
- } else {
- lcp->lc_size_chg = false;
+ if (!lcp->lc_reclaim) {
+ nfsd4_file_mark_deleg_written(ls->ls_stid.sc_file);
+ nfs4_put_stid(&ls->ls_stid);
}
-
- nfserr = ops->proc_layoutcommit(inode, lcp);
- nfs4_put_stid(&ls->ls_stid);
out:
return nfserr;
}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 2231192ec33f..e2b9472e5c78 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -92,24 +92,10 @@ nfs4_reset_creds(const struct cred *original)
put_cred(revert_creds(original));
}
-static void
-md5_to_hex(char *out, char *md5)
-{
- int i;
-
- for (i=0; i<16; i++) {
- unsigned char c = md5[i];
-
- *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
- *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
- }
- *out = '\0';
-}
-
static int
-nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
+nfs4_make_rec_clidname(char dname[HEXDIR_LEN], const struct xdr_netobj *clname)
{
- struct xdr_netobj cksum;
+ u8 digest[MD5_DIGEST_SIZE];
struct crypto_shash *tfm;
int status;
@@ -121,23 +107,16 @@ nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
goto out_no_tfm;
}
- cksum.len = crypto_shash_digestsize(tfm);
- cksum.data = kmalloc(cksum.len, GFP_KERNEL);
- if (cksum.data == NULL) {
- status = -ENOMEM;
- goto out;
- }
-
status = crypto_shash_tfm_digest(tfm, clname->data, clname->len,
- cksum.data);
+ digest);
if (status)
goto out;
- md5_to_hex(dname, cksum.data);
+ static_assert(HEXDIR_LEN == 2 * MD5_DIGEST_SIZE + 1);
+ sprintf(dname, "%*phN", MD5_DIGEST_SIZE, digest);
status = 0;
out:
- kfree(cksum.data);
crypto_free_shash(tfm);
out_no_tfm:
return status;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 88c347957da5..81fa7cc6c77b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1222,6 +1222,42 @@ static void put_deleg_file(struct nfs4_file *fp)
nfs4_file_put_access(fp, NFS4_SHARE_ACCESS_READ);
}
+static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct file *f)
+{
+ struct iattr ia = { .ia_valid = ATTR_ATIME | ATTR_CTIME | ATTR_MTIME };
+ struct inode *inode = file_inode(f);
+ int ret;
+
+ /* don't do anything if FMODE_NOCMTIME isn't set */
+ if ((READ_ONCE(f->f_mode) & FMODE_NOCMTIME) == 0)
+ return;
+
+ spin_lock(&f->f_lock);
+ f->f_mode &= ~FMODE_NOCMTIME;
+ spin_unlock(&f->f_lock);
+
+ /* was it never written? */
+ if (!dp->dl_written)
+ return;
+
+ /* did it get a setattr for the timestamps at some point? */
+ if (dp->dl_setattr)
+ return;
+
+ /* Stamp everything to "now" */
+ inode_lock(inode);
+ ret = notify_change(&nop_mnt_idmap, f->f_path.dentry, &ia, NULL);
+ inode_unlock(inode);
+ if (ret) {
+ struct inode *inode = file_inode(f);
+
+ pr_notice_ratelimited("Unable to update timestamps on inode %02x:%02x:%lu: %d\n",
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev),
+ inode->i_ino, ret);
+ }
+}
+
static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
@@ -1229,6 +1265,7 @@ static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
WARN_ON_ONCE(!fp->fi_delegees);
+ nfsd4_finalize_deleg_timestamps(dp, nf->nf_file);
kernel_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
put_deleg_file(fp);
}
@@ -6157,7 +6194,8 @@ nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh,
path.dentry = file_dentry(nf->nf_file);
rc = vfs_getattr(&path, stat,
- (STATX_MODE | STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
+ STATX_MODE | STATX_SIZE | STATX_ATIME |
+ STATX_MTIME | STATX_CTIME | STATX_CHANGE_COOKIE,
AT_STATX_SYNC_AS_STAT);
nfsd_file_put(nf);
@@ -6264,6 +6302,8 @@ nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
+ struct file *f = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
+
if (!nfsd4_add_rdaccess_to_wrdeleg(rqstp, open, fh, stp) ||
!nfs4_delegation_stat(dp, currentfh, &stat)) {
nfs4_put_stid(&dp->dl_stid);
@@ -6274,10 +6314,17 @@ nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
OPEN_DELEGATE_WRITE;
dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat);
+ dp->dl_atime = stat.atime;
+ dp->dl_ctime = stat.ctime;
+ dp->dl_mtime = stat.mtime;
+ spin_lock(&f->f_lock);
+ f->f_mode |= FMODE_NOCMTIME;
+ spin_unlock(&f->f_lock);
trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
} else {
- open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG :
- OPEN_DELEGATE_READ;
+ open->op_delegate_type = deleg_ts && nfs4_delegation_stat(dp, currentfh, &stat) ?
+ OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ;
+ dp->dl_atime = stat.atime;
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
}
nfs4_put_stid(&dp->dl_stid);
@@ -9130,25 +9177,25 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
}
/**
- * set_cb_time - vet and set the timespec for a cb_getattr update
- * @cb: timestamp from the CB_GETATTR response
+ * nfsd4_vet_deleg_time - vet and set the timespec for a delegated timestamp update
+ * @req: timestamp from the client
* @orig: original timestamp in the inode
* @now: current time
*
- * Given a timestamp in a CB_GETATTR response, check it against the
+ * Given a timestamp from the client response, check it against the
* current timestamp in the inode and the current time. Returns true
* if the inode's timestamp needs to be updated, and false otherwise.
- * @cb may also be changed if the timestamp needs to be clamped.
+ * @req may also be changed if the timestamp needs to be clamped.
*/
-static bool set_cb_time(struct timespec64 *cb, const struct timespec64 *orig,
- const struct timespec64 *now)
+bool nfsd4_vet_deleg_time(struct timespec64 *req, const struct timespec64 *orig,
+ const struct timespec64 *now)
{
/*
* "When the time presented is before the original time, then the
* update is ignored." Also no need to update if there is no change.
*/
- if (timespec64_compare(cb, orig) <= 0)
+ if (timespec64_compare(req, orig) <= 0)
return false;
/*
@@ -9156,10 +9203,8 @@ static bool set_cb_time(struct timespec64 *cb, const struct timespec64 *orig,
* clamp the new time to the current time, or it may
* return NFS4ERR_DELAY to the client, allowing it to retry."
*/
- if (timespec64_compare(cb, now) > 0) {
- /* clamp it */
- *cb = *now;
- }
+ if (timespec64_compare(req, now) > 0)
+ *req = *now;
return true;
}
@@ -9167,28 +9212,27 @@ static bool set_cb_time(struct timespec64 *cb, const struct timespec64 *orig,
static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp)
{
struct inode *inode = d_inode(dentry);
- struct timespec64 now = current_time(inode);
struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
struct iattr attrs = { };
int ret;
if (deleg_attrs_deleg(dp->dl_type)) {
- struct timespec64 atime = inode_get_atime(inode);
- struct timespec64 mtime = inode_get_mtime(inode);
+ struct timespec64 now = current_time(inode);
attrs.ia_atime = ncf->ncf_cb_atime;
attrs.ia_mtime = ncf->ncf_cb_mtime;
- if (set_cb_time(&attrs.ia_atime, &atime, &now))
+ if (nfsd4_vet_deleg_time(&attrs.ia_atime, &dp->dl_atime, &now))
attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
- if (set_cb_time(&attrs.ia_mtime, &mtime, &now)) {
- attrs.ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET;
+ if (nfsd4_vet_deleg_time(&attrs.ia_mtime, &dp->dl_mtime, &now)) {
+ attrs.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
attrs.ia_ctime = attrs.ia_mtime;
+ if (nfsd4_vet_deleg_time(&attrs.ia_ctime, &dp->dl_ctime, &now))
+ attrs.ia_valid |= ATTR_CTIME | ATTR_CTIME_SET;
}
} else {
attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME;
- attrs.ia_mtime = attrs.ia_ctime = now;
}
if (!attrs.ia_valid)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ea91bad4eee2..c0a3c6a7c8bb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -538,8 +538,9 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
iattr->ia_mtime.tv_sec = modify.seconds;
iattr->ia_mtime.tv_nsec = modify.nseconds;
iattr->ia_ctime.tv_sec = modify.seconds;
- iattr->ia_ctime.tv_nsec = modify.seconds;
- iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET | ATTR_DELEG;
+ iattr->ia_ctime.tv_nsec = modify.nseconds;
+ iattr->ia_valid |= ATTR_CTIME | ATTR_CTIME_SET |
+ ATTR_MTIME | ATTR_MTIME_SET | ATTR_DELEG;
}
/* request sanity: did attrlist4 contain the expected number of words? */
@@ -587,23 +588,13 @@ nfsd4_decode_state_owner4(struct nfsd4_compoundargs *argp,
}
#ifdef CONFIG_NFSD_PNFS
-static __be32
-nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp,
- struct nfsd4_deviceid *devid)
-{
- __be32 *p;
-
- p = xdr_inline_decode(argp->xdr, NFS4_DEVICEID4_SIZE);
- if (!p)
- return nfserr_bad_xdr;
- memcpy(devid, p, sizeof(*devid));
- return nfs_ok;
-}
static __be32
nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
struct nfsd4_layoutcommit *lcp)
{
+ u32 len;
+
if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0)
return nfserr_bad_xdr;
if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES)
@@ -611,13 +602,10 @@ nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX)
return nfserr_bad_xdr;
- if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0)
+ if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
+ return nfserr_bad_xdr;
+ if (!xdr_stream_subsegment(argp->xdr, &lcp->lc_up_layout, len))
return nfserr_bad_xdr;
- if (lcp->lc_up_len > 0) {
- lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len);
- if (!lcp->lc_up_layout)
- return nfserr_bad_xdr;
- }
return nfs_ok;
}
@@ -1783,7 +1771,7 @@ nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
__be32 status;
memset(gdev, 0, sizeof(*gdev));
- status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid);
+ status = nfsd4_decode_deviceid4(argp->xdr, &gdev->gd_devid);
if (status)
return status;
if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0)
@@ -1814,7 +1802,7 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
status = nfsd4_decode_stateid4(argp, &lcp->lc_sid);
if (status)
return status;
- if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_newoffset) < 0)
+ if (xdr_stream_decode_bool(argp->xdr, &lcp->lc_newoffset) < 0)
return nfserr_bad_xdr;
if (lcp->lc_newoffset) {
if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_last_wr) < 0)
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index ba9d326b3de6..ab13ee9c7fd8 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -27,7 +27,7 @@
* cache size, the idea being that when the cache is at its maximum number
* of entries, then this should be the average number of entries per bucket.
*/
-#define TARGET_BUCKET_SIZE 64
+#define TARGET_BUCKET_SIZE 8
struct nfsd_drc_bucket {
struct rb_root rb_head;
@@ -237,10 +237,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
}
-/*
- * Move cache entry to end of LRU list, and queue the cleaner to run if it's
- * not already scheduled.
- */
static void
lru_put_end(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp)
{
@@ -272,13 +268,6 @@ nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
/* The bucket LRU is ordered oldest-first. */
list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
- /*
- * Don't free entries attached to calls that are still
- * in-progress, but do keep scanning the list.
- */
- if (rp->c_state == RC_INPROG)
- continue;
-
if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
time_before(expiry, rp->c_timestamp))
break;
@@ -453,8 +442,6 @@ out:
nn->longest_chain_cachesize,
atomic_read(&nn->num_drc_entries));
}
-
- lru_put_end(b, ret);
return ret;
}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 1cd0bed57bc2..ea87b42894dd 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -153,6 +153,15 @@ static inline void nfsd_debugfs_exit(void) {}
extern bool nfsd_disable_splice_read __read_mostly;
+enum {
+ /* Any new NFSD_IO enum value must be added at the end */
+ NFSD_IO_BUFFERED,
+ NFSD_IO_DONTCACHE,
+};
+
+extern u64 nfsd_io_cache_read __read_mostly;
+extern u64 nfsd_io_cache_write __read_mostly;
+
extern int nfsd_max_blksize;
static inline int nfsd_v4client(struct svc_rqst *rq)
@@ -335,14 +344,8 @@ void nfsd_lockd_shutdown(void);
* cannot conflict with any existing be32 nfserr value.
*/
enum {
- NFSERR_DROPIT = NFS4ERR_FIRST_FREE,
-/* if a request fails due to kmalloc failure, it gets dropped.
- * Client should resend eventually
- */
-#define nfserr_dropit cpu_to_be32(NFSERR_DROPIT)
-
/* end-of-file indicator in readdir */
- NFSERR_EOF,
+ NFSERR_EOF = NFS4ERR_FIRST_FREE,
#define nfserr_eof cpu_to_be32(NFSERR_EOF)
/* replay detected */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 74cf1f4de174..3eb724ec9566 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -364,10 +364,30 @@ __fh_verify(struct svc_rqst *rqstp,
if (error)
goto out;
+ /*
+ * If rqstp is NULL, this is a LOCALIO request which will only
+ * ever use a filehandle/credential pair for which access has
+ * been affirmed (by ACCESS or OPEN NFS requests) over the
+ * wire. Skip both the xprtsec policy and the security flavor
+ * checks.
+ */
+ if (!rqstp)
+ goto check_permissions;
+
if ((access & NFSD_MAY_NLM) && (exp->ex_flags & NFSEXP_NOAUTHNLM))
/* NLM is allowed to fully bypass authentication */
goto out;
+ /*
+ * NLM is allowed to bypass the xprtsec policy check because lockd
+ * doesn't support xprtsec.
+ */
+ if (!(access & NFSD_MAY_NLM)) {
+ error = check_xprtsec_policy(exp, rqstp);
+ if (error)
+ goto out;
+ }
+
if (access & NFSD_MAY_BYPASS_GSS)
may_bypass_gss = true;
/*
@@ -379,13 +399,13 @@ __fh_verify(struct svc_rqst *rqstp,
&& exp->ex_path.dentry == dentry)
may_bypass_gss = true;
- error = check_nfsd_access(exp, rqstp, may_bypass_gss);
+ error = check_security_flavor(exp, rqstp, may_bypass_gss);
if (error)
goto out;
- /* During LOCALIO call to fh_verify will be called with a NULL rqstp */
- if (rqstp)
- svc_xprt_set_valid(rqstp->rq_xprt);
+ svc_xprt_set_valid(rqstp->rq_xprt);
+
+check_permissions:
/* Finally, check access permissions. */
error = nfsd_permission(cred, exp, dentry, access);
out:
@@ -663,6 +683,33 @@ out_negative:
}
/**
+ * fh_getattr - Retrieve attributes on a local file
+ * @fhp: File handle of target file
+ * @stat: Caller-supplied kstat buffer to be filled in
+ *
+ * Returns nfs_ok on success, otherwise an NFS status code is
+ * returned.
+ */
+__be32 fh_getattr(const struct svc_fh *fhp, struct kstat *stat)
+{
+ struct path p = {
+ .mnt = fhp->fh_export->ex_path.mnt,
+ .dentry = fhp->fh_dentry,
+ };
+ struct inode *inode = d_inode(p.dentry);
+ u32 request_mask = STATX_BASIC_STATS;
+
+ if (S_ISREG(inode->i_mode))
+ request_mask |= (STATX_DIOALIGN | STATX_DIO_READ_ALIGN);
+
+ if (fhp->fh_maxsize == NFS4_FHSIZE)
+ request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
+
+ return nfserrno(vfs_getattr(&p, stat, request_mask,
+ AT_STATX_SYNC_AS_STAT));
+}
+
+/**
* fh_fill_pre_attrs - Fill in pre-op attributes
* @fhp: file handle to be updated
*
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 1cf979722521..5ef7191f8ad8 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -14,6 +14,8 @@
#include <linux/exportfs.h>
#include <linux/nfs4.h>
+#include "export.h"
+
/*
* The file handle starts with a sequence of four-byte words.
* The first word contains a version number (1) and three descriptor bytes
@@ -220,6 +222,7 @@ extern char * SVCFH_fmt(struct svc_fh *fhp);
__be32 fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int);
__be32 fh_verify_local(struct net *, struct svc_cred *, struct auth_domain *,
struct svc_fh *, umode_t, int);
+__be32 fh_getattr(const struct svc_fh *fhp, struct kstat *stat);
__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
__be32 fh_update(struct svc_fh *);
void fh_put(struct svc_fh *);
@@ -272,6 +275,41 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
}
/**
+ * fh_want_write - Get write access to an export
+ * @fhp: File handle of file to be written
+ *
+ * Caller must invoke fh_drop_write() when its write operation
+ * is complete.
+ *
+ * Returns 0 if the file handle's export can be written to. Otherwise
+ * the export is not prepared for updates, and the returned negative
+ * errno value reflects the reason for the failure.
+ */
+static inline int fh_want_write(struct svc_fh *fhp)
+{
+ int ret;
+
+ if (fhp->fh_want_write)
+ return 0;
+ ret = mnt_want_write(fhp->fh_export->ex_path.mnt);
+ if (!ret)
+ fhp->fh_want_write = true;
+ return ret;
+}
+
+/**
+ * fh_drop_write - Release write access on an export
+ * @fhp: File handle of file on which fh_want_write() was previously called
+ */
+static inline void fh_drop_write(struct svc_fh *fhp)
+{
+ if (fhp->fh_want_write) {
+ fhp->fh_want_write = false;
+ mnt_drop_write(fhp->fh_export->ex_path.mnt);
+ }
+}
+
+/**
* knfsd_fh_hash - calculate the crc32 hash for the filehandle
* @fh - pointer to filehandle
*
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index 925817f66917..db9af780438b 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -29,12 +29,13 @@ struct nfsd4_layout_ops {
__be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
const struct nfsd4_getdeviceinfo *gdevp);
- __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
- struct nfsd4_layoutget *lgp);
+ __be32 (*proc_layoutget)(struct svc_rqst *rqstp, struct inode *inode,
+ const struct svc_fh *fhp, struct nfsd4_layoutget *lgp);
__be32 (*encode_layoutget)(struct xdr_stream *xdr,
const struct nfsd4_layoutget *lgp);
__be32 (*proc_layoutcommit)(struct inode *inode,
+ struct svc_rqst *rqstp,
struct nfsd4_layoutcommit *lcp);
void (*fence_client)(struct nfs4_layout_stateid *ls,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 8adc2550129e..1e736f402426 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -35,6 +35,7 @@
#ifndef _NFSD4_STATE_H
#define _NFSD4_STATE_H
+#include <crypto/md5.h>
#include <linux/idr.h>
#include <linux/refcount.h>
#include <linux/sunrpc/svc_xprt.h>
@@ -217,13 +218,20 @@ struct nfs4_delegation {
struct nfs4_clnt_odstate *dl_clnt_odstate;
time64_t dl_time;
u32 dl_type;
-/* For recall: */
+ /* For recall: */
int dl_retries;
struct nfsd4_callback dl_recall;
bool dl_recalled;
+ bool dl_written;
+ bool dl_setattr;
/* for CB_GETATTR */
struct nfs4_cb_fattr dl_cb_fattr;
+
+ /* For delegated timestamps */
+ struct timespec64 dl_atime;
+ struct timespec64 dl_mtime;
+ struct timespec64 dl_ctime;
};
static inline bool deleg_is_read(u32 dl_type)
@@ -242,6 +250,9 @@ static inline bool deleg_attrs_deleg(u32 dl_type)
dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG;
}
+bool nfsd4_vet_deleg_time(struct timespec64 *cb, const struct timespec64 *orig,
+ const struct timespec64 *now);
+
#define cb_to_delegation(cb) \
container_of(cb, struct nfs4_delegation, dl_recall)
@@ -381,7 +392,8 @@ struct nfsd4_sessionid {
u32 reserved;
};
-#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
+/* Length of MD5 digest as hex, plus terminating '\0' */
+#define HEXDIR_LEN (2 * MD5_DIGEST_SIZE + 1)
/*
* State Meaning Where set
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index aa4a95713a48..9cb20d4aeab1 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -49,6 +49,8 @@
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
bool nfsd_disable_splice_read __read_mostly;
+u64 nfsd_io_cache_read __read_mostly = NFSD_IO_BUFFERED;
+u64 nfsd_io_cache_write __read_mostly = NFSD_IO_BUFFERED;
/**
* nfserrno - Map Linux errnos to NFS errnos
@@ -467,7 +469,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
return 0;
}
- if (!iap->ia_valid)
+ if ((iap->ia_valid & ~ATTR_DELEG) == 0)
return 0;
/*
@@ -1099,6 +1101,16 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
size_t len;
init_sync_kiocb(&kiocb, file);
+
+ switch (nfsd_io_cache_read) {
+ case NFSD_IO_BUFFERED:
+ break;
+ case NFSD_IO_DONTCACHE:
+ if (file->f_op->fop_flags & FOP_DONTCACHE)
+ kiocb.ki_flags = IOCB_DONTCACHE;
+ break;
+ }
+
kiocb.ki_pos = offset;
v = 0;
@@ -1224,6 +1236,15 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
since = READ_ONCE(file->f_wb_err);
if (verf)
nfsd_copy_write_verifier(verf, nn);
+
+ switch (nfsd_io_cache_write) {
+ case NFSD_IO_BUFFERED:
+ break;
+ case NFSD_IO_DONTCACHE:
+ if (file->f_op->fop_flags & FOP_DONTCACHE)
+ kiocb.ki_flags |= IOCB_DONTCACHE;
+ break;
+ }
host_err = vfs_iocb_iter_write(file, &kiocb, &iter);
if (host_err < 0) {
commit_reset_write_verifier(nn, rqstp, host_err);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index fde3e0c11dba..0c0292611c6d 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -160,41 +160,4 @@ __be32 nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
void nfsd_filp_close(struct file *fp);
-static inline int fh_want_write(struct svc_fh *fh)
-{
- int ret;
-
- if (fh->fh_want_write)
- return 0;
- ret = mnt_want_write(fh->fh_export->ex_path.mnt);
- if (!ret)
- fh->fh_want_write = true;
- return ret;
-}
-
-static inline void fh_drop_write(struct svc_fh *fh)
-{
- if (fh->fh_want_write) {
- fh->fh_want_write = false;
- mnt_drop_write(fh->fh_export->ex_path.mnt);
- }
-}
-
-static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
-{
- u32 request_mask = STATX_BASIC_STATS;
- struct path p = {.mnt = fh->fh_export->ex_path.mnt,
- .dentry = fh->fh_dentry};
- struct inode *inode = d_inode(p.dentry);
-
- if (S_ISREG(inode->i_mode))
- request_mask |= (STATX_DIOALIGN | STATX_DIO_READ_ALIGN);
-
- if (fh->fh_maxsize == NFS4_FHSIZE)
- request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
-
- return nfserrno(vfs_getattr(&p, stat, request_mask,
- AT_STATX_SYNC_AS_STAT));
-}
-
#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index a23bc56051ca..d4b48602b2b0 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -595,9 +595,43 @@ struct nfsd4_reclaim_complete {
struct nfsd4_deviceid {
u64 fsid_idx;
u32 generation;
- u32 pad;
};
+static inline __be32 *
+svcxdr_encode_deviceid4(__be32 *p, const struct nfsd4_deviceid *devid)
+{
+ __be64 *q = (__be64 *)p;
+
+ *q = (__force __be64)devid->fsid_idx;
+ p += 2;
+ *p++ = (__force __be32)devid->generation;
+ *p++ = xdr_zero;
+ return p;
+}
+
+static inline __be32 *
+svcxdr_decode_deviceid4(__be32 *p, struct nfsd4_deviceid *devid)
+{
+ __be64 *q = (__be64 *)p;
+
+ devid->fsid_idx = (__force u64)(*q);
+ p += 2;
+ devid->generation = (__force u32)(*p++);
+ p++; /* NFSD does not use the remaining octets */
+ return p;
+}
+
+static inline __be32
+nfsd4_decode_deviceid4(struct xdr_stream *xdr, struct nfsd4_deviceid *devid)
+{
+ __be32 *p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE);
+
+ if (unlikely(!p))
+ return nfserr_bad_xdr;
+ svcxdr_decode_deviceid4(p, devid);
+ return nfs_ok;
+}
+
struct nfsd4_layout_seg {
u32 iomode;
u64 offset;
@@ -630,8 +664,7 @@ struct nfsd4_layoutcommit {
u64 lc_last_wr; /* request */
struct timespec64 lc_mtime; /* request */
u32 lc_layout_type; /* request */
- u32 lc_up_len; /* layout length */
- void *lc_up_layout; /* decoded by callback */
+ struct xdr_buf lc_up_layout; /* decoded by callback */
bool lc_size_chg; /* response */
u64 lc_newsize; /* response */
};
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 540004970ad5..c895146c1444 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -236,6 +236,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define ATTR_ATIME_SET (1 << 7)
#define ATTR_MTIME_SET (1 << 8)
#define ATTR_FORCE (1 << 9) /* Not a change, but a change it */
+#define ATTR_CTIME_SET (1 << 10)
#define ATTR_KILL_SUID (1 << 11)
#define ATTR_KILL_SGID (1 << 12)
#define ATTR_FILE (1 << 13)
diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 7ca2715edccc..3d91043254e6 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -63,7 +63,6 @@ struct nfsd_localio_operations {
struct nfsd_file __rcu **pnf,
const fmode_t);
struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **);
- struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *);
struct file *(*nfsd_file_file)(struct nfsd_file *);
void (*nfsd_file_dio_alignment)(struct nfsd_file *,
u32 *, u32 *, u32 *);
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index fde60d4e2cd5..da2a2531e110 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -104,6 +104,9 @@ enum {
* it has access to. It is NOT counted
* in ->sv_tmpcnt.
*/
+ XPT_RPCB_UNREG, /* transport that needs unregistering
+ * with rpcbind (TCP, UDP) on destroy
+ */
};
/*
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 49278749ad0c..152597750f55 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -721,7 +721,7 @@ xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr)
* @len: size of buffer pointed to by @ptr
*
* Return values:
- * On success, returns size of object stored in @ptr
+ * %0 on success
* %-EBADMSG on XDR buffer overflow
*/
static inline ssize_t
@@ -732,7 +732,7 @@ xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len)
if (unlikely(!p))
return -EBADMSG;
xdr_decode_opaque_fixed(p, ptr, len);
- return len;
+ return 0;
}
/**
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index a570e7adf270..984e0cf9bf8a 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -18,9 +18,10 @@ config SUNRPC_SWAP
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism"
- depends on SUNRPC && CRYPTO
+ depends on SUNRPC
default y
select SUNRPC_GSS
+ select CRYPTO
select CRYPTO_SKCIPHER
select CRYPTO_HASH
help
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index e82212f6b562..a8ec30759a18 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -724,7 +724,7 @@ svcauth_gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
- if (flavor != RPC_AUTH_GSS) {
+ if (flavor != RPC_AUTH_GSS || checksum.len < XDR_UNIT) {
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index de05ef637bdc..4704dce7284e 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1425,8 +1425,6 @@ svc_process_common(struct svc_rqst *rqstp)
/* Call the function that processes the request. */
rc = process.dispatch(rqstp);
- if (procp->pc_release)
- procp->pc_release(rqstp);
xdr_finish_decode(xdr);
if (!rc)
@@ -1525,6 +1523,14 @@ static void svc_drop(struct svc_rqst *rqstp)
trace_svc_drop(rqstp);
}
+static void svc_release_rqst(struct svc_rqst *rqstp)
+{
+ const struct svc_procedure *procp = rqstp->rq_procinfo;
+
+ if (procp && procp->pc_release)
+ procp->pc_release(rqstp);
+}
+
/**
* svc_process - Execute one RPC transaction
* @rqstp: RPC transaction context
@@ -1564,9 +1570,12 @@ void svc_process(struct svc_rqst *rqstp)
if (unlikely(*p != rpc_call))
goto out_baddir;
- if (!svc_process_common(rqstp))
+ if (!svc_process_common(rqstp)) {
+ svc_release_rqst(rqstp);
goto out_drop;
+ }
svc_send(rqstp);
+ svc_release_rqst(rqstp);
return;
out_baddir:
@@ -1634,6 +1643,7 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
if (!proc_error) {
/* Processing error: drop the request */
xprt_free_bc_request(req);
+ svc_release_rqst(rqstp);
return;
}
/* Finally, send the reply synchronously */
@@ -1647,6 +1657,7 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
timeout.to_maxval = timeout.to_initval;
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
task = rpc_run_bc_task(req, &timeout);
+ svc_release_rqst(rqstp);
if (IS_ERR(task))
return;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 049ab53088e9..6973184ff667 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1014,6 +1014,19 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
struct svc_serv *serv = xprt->xpt_server;
struct svc_deferred_req *dr;
+ /* unregister with rpcbind for when transport type is TCP or UDP.
+ */
+ if (test_bit(XPT_RPCB_UNREG, &xprt->xpt_flags)) {
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock,
+ sk_xprt);
+ struct socket *sock = svsk->sk_sock;
+
+ if (svc_register(serv, xprt->xpt_net, sock->sk->sk_family,
+ sock->sk->sk_protocol, 0) < 0)
+ pr_warn("failed to unregister %s with rpcbind\n",
+ xprt->xpt_class->xcl_name);
+ }
+
if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
return;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e2c5e0e626f9..7b90abc5cf0e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -836,6 +836,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
/* data might have come in before data_ready set up */
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
+ set_bit(XPT_RPCB_UNREG, &svsk->sk_xprt.xpt_flags);
/* make sure we get destination address info */
switch (svsk->sk_sk->sk_family) {
@@ -1224,7 +1225,7 @@ err_noclose:
* that the pages backing @xdr are unchanging.
*/
static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
- rpc_fraghdr marker, int *sentp)
+ rpc_fraghdr marker)
{
struct msghdr msg = {
.msg_flags = MSG_SPLICE_PAGES,
@@ -1233,8 +1234,6 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
void *buf;
int ret;
- *sentp = 0;
-
/* The stream record marker is copied into a temporary page
* fragment buffer so that it can be included in rq_bvec.
*/
@@ -1252,10 +1251,7 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
1 + count, sizeof(marker) + rqstp->rq_res.len);
ret = sock_sendmsg(svsk->sk_sock, &msg);
page_frag_free(buf);
- if (ret < 0)
- return ret;
- *sentp += ret;
- return 0;
+ return ret;
}
/**
@@ -1274,7 +1270,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
struct xdr_buf *xdr = &rqstp->rq_res;
rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
(u32)xdr->len);
- int sent, err;
+ int sent;
svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
rqstp->rq_xprt_ctxt = NULL;
@@ -1282,9 +1278,9 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- err = svc_tcp_sendmsg(svsk, rqstp, marker, &sent);
- trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
- if (err < 0 || sent != (xdr->len + sizeof(marker)))
+ sent = svc_tcp_sendmsg(svsk, rqstp, marker);
+ trace_svcsock_tcp_send(xprt, sent);
+ if (sent < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
mutex_unlock(&xprt->xpt_mutex);
return sent;
@@ -1293,10 +1289,10 @@ out_notconn:
mutex_unlock(&xprt->xpt_mutex);
return -ENOTCONN;
out_close:
- pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
+ pr_notice("rpc-srv/tcp: %s: %s %d when sending %zu bytes - shutting down socket\n",
xprt->xpt_server->sv_name,
- (err < 0) ? "got error" : "sent",
- (err < 0) ? err : sent, xdr->len);
+ (sent < 0) ? "got error" : "sent",
+ sent, xdr->len + sizeof(marker));
svc_xprt_deferred_close(xprt);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
@@ -1355,6 +1351,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
if (sk->sk_state == TCP_LISTEN) {
strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
+ set_bit(XPT_RPCB_UNREG, &svsk->sk_xprt.xpt_flags);
sk->sk_data_ready = svc_tcp_listen_data_ready;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
} else {
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 09434e1143c5..8b01b7ae2690 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -389,7 +389,7 @@ static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
saddr = (struct sockaddr *)&xprt->addr;
port = rpc_get_port(saddr);
- /* buf_len is the len until the first occurence of either
+ /* buf_len is the len until the first occurrence of either
* '\n' or '\0'
*/
buf_len = strcspn(buf, "\n");
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
index 8b4ff08c49e5..bdc7bd24ffb1 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
@@ -13,5 +13,5 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
{% if annotate %}
/* (fixed-length opaque) */
{% endif %}
- return xdr_stream_decode_opaque_fixed(xdr, ptr, {{ size }}) >= 0;
+ return xdr_stream_decode_opaque_fixed(xdr, ptr, {{ size }}) == 0;
};