summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c2
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/9p/vfs_inode.c4
-rw-r--r--fs/9p/vfs_inode_dotl.c14
-rw-r--r--fs/Kconfig10
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/afs/Kconfig1
-rw-r--r--fs/afs/cmservice.c5
-rw-r--r--fs/afs/dir.c236
-rw-r--r--fs/afs/dir_silly.c3
-rw-r--r--fs/afs/file.c483
-rw-r--r--fs/afs/fs_operation.c10
-rw-r--r--fs/afs/fsclient.c112
-rw-r--r--fs/afs/inode.c19
-rw-r--r--fs/afs/internal.h61
-rw-r--r--fs/afs/rxrpc.c150
-rw-r--r--fs/afs/vlclient.c1
-rw-r--r--fs/afs/write.c658
-rw-r--r--fs/afs/yfsclient.c82
-rw-r--r--fs/aio.c5
-rw-r--r--fs/autofs/autofs_i.h1
-rw-r--r--fs/autofs/expire.c2
-rw-r--r--fs/autofs/waitq.c72
-rw-r--r--fs/befs/TODO14
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/binfmt_elf_fdpic.c3
-rw-r--r--fs/binfmt_flat.c18
-rw-r--r--fs/block_dev.c59
-rw-r--r--fs/btrfs/compression.c5
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/extent_io.c22
-rw-r--r--fs/btrfs/inode.c37
-rw-r--r--fs/btrfs/ioctl.c228
-rw-r--r--fs/btrfs/raid56.c3
-rw-r--r--fs/btrfs/reflink.c6
-rw-r--r--fs/btrfs/tree-log.c3
-rw-r--r--fs/btrfs/volumes.c3
-rw-r--r--fs/btrfs/zlib.c5
-rw-r--r--fs/btrfs/zstd.c5
-rw-r--r--fs/buffer.c42
-rw-r--r--fs/cachefiles/Makefile1
-rw-r--r--fs/cachefiles/interface.c5
-rw-r--r--fs/cachefiles/internal.h9
-rw-r--r--fs/cachefiles/io.c420
-rw-r--r--fs/ceph/Kconfig1
-rw-r--r--fs/ceph/addr.c626
-rw-r--r--fs/ceph/cache.c125
-rw-r--r--fs/ceph/cache.h101
-rw-r--r--fs/ceph/caps.c35
-rw-r--r--fs/ceph/debugfs.c12
-rw-r--r--fs/ceph/dir.c32
-rw-r--r--fs/ceph/export.c21
-rw-r--r--fs/ceph/file.c52
-rw-r--r--fs/ceph/inode.c77
-rw-r--r--fs/ceph/io.c2
-rw-r--r--fs/ceph/mds_client.c20
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/metric.c62
-rw-r--r--fs/ceph/metric.h56
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.h32
-rw-r--r--fs/ceph/xattr.c7
-rw-r--r--fs/cifs/cifs_debug.c58
-rw-r--r--fs/cifs/cifs_dfs_ref.c14
-rw-r--r--fs/cifs/cifs_fs_sb.h5
-rw-r--r--fs/cifs/cifs_ioctl.h48
-rw-r--r--fs/cifs/cifs_swn.h27
-rw-r--r--fs/cifs/cifsacl.c4
-rw-r--r--fs/cifs/cifsfs.c60
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h47
-rw-r--r--fs/cifs/cifspdu.h5
-rw-r--r--fs/cifs/cifsproto.h45
-rw-r--r--fs/cifs/cifssmb.c52
-rw-r--r--fs/cifs/connect.c71
-rw-r--r--fs/cifs/dfs_cache.c41
-rw-r--r--fs/cifs/dir.c179
-rw-r--r--fs/cifs/file.c187
-rw-r--r--fs/cifs/fs_context.c153
-rw-r--r--fs/cifs/fs_context.h13
-rw-r--r--fs/cifs/inode.c224
-rw-r--r--fs/cifs/ioctl.c192
-rw-r--r--fs/cifs/link.c57
-rw-r--r--fs/cifs/misc.c96
-rw-r--r--fs/cifs/readdir.c19
-rw-r--r--fs/cifs/sess.c6
-rw-r--r--fs/cifs/smb1ops.c6
-rw-r--r--fs/cifs/smb2inode.c10
-rw-r--r--fs/cifs/smb2misc.c1
-rw-r--r--fs/cifs/smb2ops.c199
-rw-r--r--fs/cifs/smb2pdu.c20
-rw-r--r--fs/cifs/smb2pdu.h49
-rw-r--r--fs/cifs/smb2proto.h16
-rw-r--r--fs/cifs/trace.h29
-rw-r--r--fs/cifs/unc.c4
-rw-r--r--fs/cifs/xattr.c44
-rw-r--r--fs/coda/file.c6
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c4
-rw-r--r--fs/configfs/file.c4
-rw-r--r--fs/configfs/inode.c4
-rw-r--r--fs/configfs/item.c4
-rw-r--r--fs/configfs/mount.c4
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/coredump.c72
-rw-r--r--fs/crypto/Kconfig30
-rw-r--r--fs/d_path.c10
-rw-r--r--fs/dax.c43
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/debugfs/file.c94
-rw-r--r--fs/debugfs/inode.c11
-rw-r--r--fs/dlm/config.c86
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/debug_fs.c1
-rw-r--r--fs/dlm/lock.c2
-rw-r--r--fs/dlm/lockspace.c20
-rw-r--r--fs/dlm/lowcomms.c194
-rw-r--r--fs/dlm/lowcomms.h5
-rw-r--r--fs/dlm/midcomms.c33
-rw-r--r--fs/dlm/rcom.c2
-rw-r--r--fs/ecryptfs/crypto.c29
-rw-r--r--fs/ecryptfs/debug.c4
-rw-r--r--fs/ecryptfs/dentry.c2
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h19
-rw-r--r--fs/ecryptfs/file.c4
-rw-r--r--fs/ecryptfs/inode.c196
-rw-r--r--fs/ecryptfs/keystore.c15
-rw-r--r--fs/ecryptfs/kthread.c3
-rw-r--r--fs/ecryptfs/main.c30
-rw-r--r--fs/ecryptfs/messaging.c14
-rw-r--r--fs/ecryptfs/miscdev.c3
-rw-r--r--fs/ecryptfs/mmap.c11
-rw-r--r--fs/ecryptfs/read_write.c4
-rw-r--r--fs/ecryptfs/super.c8
-rw-r--r--fs/efivarfs/file.c77
-rw-r--r--fs/efivarfs/inode.c44
-rw-r--r--fs/erofs/Kconfig14
-rw-r--r--fs/erofs/Makefile2
-rw-r--r--fs/erofs/data.c19
-rw-r--r--fs/erofs/decompressor.c272
-rw-r--r--fs/erofs/erofs_fs.h54
-rw-r--r--fs/erofs/inode.c7
-rw-r--r--fs/erofs/internal.h86
-rw-r--r--fs/erofs/pcpubuf.c148
-rw-r--r--fs/erofs/super.c148
-rw-r--r--fs/erofs/utils.c12
-rw-r--r--fs/erofs/zdata.c254
-rw-r--r--fs/erofs/zdata.h14
-rw-r--r--fs/erofs/zmap.c181
-rw-r--r--fs/eventpoll.c58
-rw-r--r--fs/exfat/balloc.c95
-rw-r--r--fs/exfat/dir.c26
-rw-r--r--fs/exfat/exfat_fs.h11
-rw-r--r--fs/exfat/fatent.c41
-rw-r--r--fs/exfat/file.c53
-rw-r--r--fs/exfat/inode.c3
-rw-r--r--fs/exfat/namei.c11
-rw-r--r--fs/exfat/super.c1
-rw-r--r--fs/ext2/dir.c94
-rw-r--r--fs/ext2/ext2.h19
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/ioctl.c88
-rw-r--r--fs/ext2/namei.c39
-rw-r--r--fs/ext2/super.c7
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/dir.c41
-rw-r--r--fs/ext4/ext4.h119
-rw-r--r--fs/ext4/fast_commit.c8
-rw-r--r--fs/ext4/file.c27
-rw-r--r--fs/ext4/fsmap.c4
-rw-r--r--fs/ext4/hash.c25
-rw-r--r--fs/ext4/ialloc.c53
-rw-r--r--fs/ext4/indirect.c2
-rw-r--r--fs/ext4/inline.c27
-rw-r--r--fs/ext4/inode.c8
-rw-r--r--fs/ext4/ioctl.c214
-rw-r--r--fs/ext4/mballoc.c592
-rw-r--r--fs/ext4/mballoc.h24
-rw-r--r--fs/ext4/migrate.c6
-rw-r--r--fs/ext4/mmp.c2
-rw-r--r--fs/ext4/namei.c250
-rw-r--r--fs/ext4/super.c121
-rw-r--r--fs/ext4/sysfs.c8
-rw-r--r--fs/ext4/verity.c12
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/f2fs/Kconfig16
-rw-r--r--fs/f2fs/acl.c1
-rw-r--r--fs/f2fs/checkpoint.c9
-rw-r--r--fs/f2fs/compress.c70
-rw-r--r--fs/f2fs/compress.h0
-rw-r--r--fs/f2fs/data.c146
-rw-r--r--fs/f2fs/debug.c3
-rw-r--r--fs/f2fs/dir.c5
-rw-r--r--fs/f2fs/f2fs.h60
-rw-r--r--fs/f2fs/file.c272
-rw-r--r--fs/f2fs/gc.c95
-rw-r--r--fs/f2fs/gc.h6
-rw-r--r--fs/f2fs/inline.c3
-rw-r--r--fs/f2fs/inode.c3
-rw-r--r--fs/f2fs/namei.c8
-rw-r--r--fs/f2fs/node.c19
-rw-r--r--fs/f2fs/node.h1
-rw-r--r--fs/f2fs/recovery.c3
-rw-r--r--fs/f2fs/segment.c184
-rw-r--r--fs/f2fs/segment.h16
-rw-r--r--fs/f2fs/super.c102
-rw-r--r--fs/f2fs/sysfs.c47
-rw-r--r--fs/f2fs/verity.c77
-rw-r--r--fs/f2fs/xattr.c1
-rw-r--r--fs/fat/fatent.c2
-rw-r--r--fs/file.c39
-rw-r--r--fs/fs_parser.c2
-rw-r--r--fs/fscache/Kconfig1
-rw-r--r--fs/fscache/Makefile1
-rw-r--r--fs/fscache/internal.h4
-rw-r--r--fs/fscache/io.c116
-rw-r--r--fs/fscache/page.c2
-rw-r--r--fs/fscache/stats.c1
-rw-r--r--fs/fuse/Makefile2
-rw-r--r--fs/fuse/acl.c7
-rw-r--r--fs/fuse/cuse.c12
-rw-r--r--fs/fuse/dev.c7
-rw-r--r--fs/fuse/dir.c12
-rw-r--r--fs/fuse/file.c506
-rw-r--r--fs/fuse/fuse_i.h53
-rw-r--r--fs/fuse/inode.c12
-rw-r--r--fs/fuse/ioctl.c490
-rw-r--r--fs/fuse/readdir.c2
-rw-r--r--fs/fuse/virtio_fs.c28
-rw-r--r--fs/fuse/xattr.c9
-rw-r--r--fs/gfs2/aops.c5
-rw-r--r--fs/gfs2/bmap.c153
-rw-r--r--fs/gfs2/bmap.h13
-rw-r--r--fs/gfs2/dir.c52
-rw-r--r--fs/gfs2/file.c80
-rw-r--r--fs/gfs2/glock.c46
-rw-r--r--fs/gfs2/glops.c38
-rw-r--r--fs/gfs2/incore.h3
-rw-r--r--fs/gfs2/inode.c36
-rw-r--r--fs/gfs2/inode.h4
-rw-r--r--fs/gfs2/lock_dlm.c37
-rw-r--r--fs/gfs2/log.c31
-rw-r--r--fs/gfs2/log.h1
-rw-r--r--fs/gfs2/lops.c23
-rw-r--r--fs/gfs2/lops.h1
-rw-r--r--fs/gfs2/meta_io.c20
-rw-r--r--fs/gfs2/meta_io.h6
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/gfs2/quota.c6
-rw-r--r--fs/gfs2/recovery.c8
-rw-r--r--fs/gfs2/rgrp.c8
-rw-r--r--fs/gfs2/super.c12
-rw-r--r--fs/gfs2/sys.c67
-rw-r--r--fs/gfs2/util.c20
-rw-r--r--fs/gfs2/xattr.c29
-rw-r--r--fs/hfsplus/dir.c2
-rw-r--r--fs/hfsplus/extents.c7
-rw-r--r--fs/hfsplus/hfsplus_fs.h14
-rw-r--r--fs/hfsplus/inode.c54
-rw-r--r--fs/hfsplus/ioctl.c84
-rw-r--r--fs/hostfs/hostfs_kern.c3
-rw-r--r--fs/hpfs/hpfs.h3
-rw-r--r--fs/hugetlbfs/inode.c16
-rw-r--r--fs/inode.c100
-rw-r--r--fs/io-wq.c351
-rw-r--r--fs/io-wq.h3
-rw-r--r--fs/io_uring.c2681
-rw-r--r--fs/ioctl.c325
-rw-r--r--fs/iomap/buffered-io.c14
-rw-r--r--fs/iomap/direct-io.c24
-rw-r--r--fs/iomap/swapfile.c38
-rw-r--r--fs/isofs/rock.c1
-rw-r--r--fs/jbd2/recovery.c5
-rw-r--r--fs/jbd2/transaction.c15
-rw-r--r--fs/jffs2/TODO37
-rw-r--r--fs/jffs2/file.c1
-rw-r--r--fs/jffs2/scan.c2
-rw-r--r--fs/jffs2/summary.h16
-rw-r--r--fs/jfs/file.c6
-rw-r--r--fs/jfs/ioctl.c111
-rw-r--r--fs/jfs/jfs_dinode.h7
-rw-r--r--fs/jfs/jfs_inode.h4
-rw-r--r--fs/jfs/namei.c6
-rw-r--r--fs/libfs.c1
-rw-r--r--fs/locks.c69
-rw-r--r--fs/namei.c25
-rw-r--r--fs/namespace.c20
-rw-r--r--fs/netfs/Kconfig23
-rw-r--r--fs/netfs/Makefile5
-rw-r--r--fs/netfs/internal.h97
-rw-r--r--fs/netfs/read_helper.c1185
-rw-r--r--fs/netfs/stats.c59
-rw-r--r--fs/nfs/callback_proc.c17
-rw-r--r--fs/nfs/client.c20
-rw-r--r--fs/nfs/delegation.c29
-rw-r--r--fs/nfs/delegation.h3
-rw-r--r--fs/nfs/dir.c36
-rw-r--r--fs/nfs/export.c15
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/filelayout/filelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c2
-rw-r--r--fs/nfs/fs_context.c69
-rw-r--r--fs/nfs/inode.c424
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/io.c2
-rw-r--r--fs/nfs/mount_clnt.c14
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs3acl.c2
-rw-r--r--fs/nfs/nfs3xdr.c5
-rw-r--r--fs/nfs/nfs42proc.c77
-rw-r--r--fs/nfs/nfs42xattr.c2
-rw-r--r--fs/nfs/nfs4file.c6
-rw-r--r--fs/nfs/nfs4proc.c268
-rw-r--r--fs/nfs/nfs4renewd.c6
-rw-r--r--fs/nfs/nfs4state.c8
-rw-r--r--fs/nfs/nfs4trace.h47
-rw-r--r--fs/nfs/nfs4xdr.c66
-rw-r--r--fs/nfs/nfstrace.c1
-rw-r--r--fs/nfs/nfstrace.h22
-rw-r--r--fs/nfs/pagelist.c24
-rw-r--r--fs/nfs/pnfs.c26
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/super.c14
-rw-r--r--fs/nfs/write.c7
-rw-r--r--fs/nfs_common/nfsacl.c71
-rw-r--r--fs/nfsd/Kconfig8
-rw-r--r--fs/nfsd/netns.h6
-rw-r--r--fs/nfsd/nfs2acl.c87
-rw-r--r--fs/nfsd/nfs3acl.c39
-rw-r--r--fs/nfsd/nfs3proc.c97
-rw-r--r--fs/nfsd/nfs3xdr.c1043
-rw-r--r--fs/nfsd/nfs4proc.c46
-rw-r--r--fs/nfsd/nfs4recover.c8
-rw-r--r--fs/nfsd/nfs4state.c497
-rw-r--r--fs/nfsd/nfs4xdr.c116
-rw-r--r--fs/nfsd/nfsctl.c29
-rw-r--r--fs/nfsd/nfsd.h7
-rw-r--r--fs/nfsd/nfsfh.c2
-rw-r--r--fs/nfsd/nfsfh.h2
-rw-r--r--fs/nfsd/nfsproc.c55
-rw-r--r--fs/nfsd/nfssvc.c50
-rw-r--r--fs/nfsd/nfsxdr.c413
-rw-r--r--fs/nfsd/state.h7
-rw-r--r--fs/nfsd/trace.h24
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/nfsd/vfs.h2
-rw-r--r--fs/nfsd/xdr.h23
-rw-r--r--fs/nfsd/xdr3.h37
-rw-r--r--fs/nfsd/xdr4.h8
-rw-r--r--fs/nilfs2/cpfile.c2
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/ioctl.c65
-rw-r--r--fs/nilfs2/namei.c5
-rw-r--r--fs/nilfs2/nilfs.h3
-rw-r--r--fs/nilfs2/segment.c4
-rw-r--r--fs/nilfs2/the_nilfs.c2
-rw-r--r--fs/notify/fanotify/fanotify.c166
-rw-r--r--fs/notify/fanotify/fanotify.h46
-rw-r--r--fs/notify/fanotify/fanotify_user.c241
-rw-r--r--fs/notify/fdinfo.c3
-rw-r--r--fs/notify/group.c1
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c9
-rw-r--r--fs/notify/inotify/inotify_user.c7
-rw-r--r--fs/notify/mark.c4
-rw-r--r--fs/notify/notification.c64
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/ocfs2/acl.h4
-rw-r--r--fs/ocfs2/alloc.c4
-rw-r--r--fs/ocfs2/alloc.h4
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/aops.h4
-rw-r--r--fs/ocfs2/blockcheck.c6
-rw-r--r--fs/ocfs2/blockcheck.h4
-rw-r--r--fs/ocfs2/buffer_head_io.c4
-rw-r--r--fs/ocfs2/buffer_head_io.h4
-rw-r--r--fs/ocfs2/cluster/heartbeat.c4
-rw-r--r--fs/ocfs2/cluster/heartbeat.h4
-rw-r--r--fs/ocfs2/cluster/masklog.c4
-rw-r--r--fs/ocfs2/cluster/masklog.h4
-rw-r--r--fs/ocfs2/cluster/netdebug.c4
-rw-r--r--fs/ocfs2/cluster/nodemanager.c4
-rw-r--r--fs/ocfs2/cluster/nodemanager.h4
-rw-r--r--fs/ocfs2/cluster/ocfs2_heartbeat.h4
-rw-r--r--fs/ocfs2/cluster/ocfs2_nodemanager.h4
-rw-r--r--fs/ocfs2/cluster/quorum.c4
-rw-r--r--fs/ocfs2/cluster/quorum.h4
-rw-r--r--fs/ocfs2/cluster/sys.c4
-rw-r--r--fs/ocfs2/cluster/sys.h4
-rw-r--r--fs/ocfs2/cluster/tcp.c4
-rw-r--r--fs/ocfs2/cluster/tcp.h4
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h4
-rw-r--r--fs/ocfs2/dcache.c4
-rw-r--r--fs/ocfs2/dcache.h4
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dir.h4
-rw-r--r--fs/ocfs2/dlm/dlmapi.h4
-rw-r--r--fs/ocfs2/dlm/dlmast.c4
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c4
-rw-r--r--fs/ocfs2/dlm/dlmconvert.h4
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c4
-rw-r--r--fs/ocfs2/dlm/dlmdebug.h4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.h4
-rw-r--r--fs/ocfs2/dlm/dlmlock.c4
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c11
-rw-r--r--fs/ocfs2/dlm/dlmthread.c4
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c4
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/ocfs2/dlmfs/userdlm.c4
-rw-r--r--fs/ocfs2/dlmfs/userdlm.h4
-rw-r--r--fs/ocfs2/dlmglue.c16
-rw-r--r--fs/ocfs2/dlmglue.h4
-rw-r--r--fs/ocfs2/export.c4
-rw-r--r--fs/ocfs2/export.h4
-rw-r--r--fs/ocfs2/extent_map.c4
-rw-r--r--fs/ocfs2/extent_map.h4
-rw-r--r--fs/ocfs2/file.c6
-rw-r--r--fs/ocfs2/file.h4
-rw-r--r--fs/ocfs2/filecheck.c4
-rw-r--r--fs/ocfs2/filecheck.h4
-rw-r--r--fs/ocfs2/heartbeat.c4
-rw-r--r--fs/ocfs2/heartbeat.h4
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/inode.h4
-rw-r--r--fs/ocfs2/ioctl.c59
-rw-r--r--fs/ocfs2/ioctl.h3
-rw-r--r--fs/ocfs2/journal.c4
-rw-r--r--fs/ocfs2/journal.h4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/localalloc.h4
-rw-r--r--fs/ocfs2/locks.c4
-rw-r--r--fs/ocfs2/locks.h4
-rw-r--r--fs/ocfs2/mmap.c4
-rw-r--r--fs/ocfs2/move_extents.c4
-rw-r--r--fs/ocfs2/move_extents.h4
-rw-r--r--fs/ocfs2/namei.c7
-rw-r--r--fs/ocfs2/namei.h4
-rw-r--r--fs/ocfs2/ocfs1_fs_compat.h4
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/ocfs2_fs.h4
-rw-r--r--fs/ocfs2/ocfs2_ioctl.h12
-rw-r--r--fs/ocfs2/ocfs2_lockid.h4
-rw-r--r--fs/ocfs2/ocfs2_lockingver.h4
-rw-r--r--fs/ocfs2/refcounttree.c4
-rw-r--r--fs/ocfs2/refcounttree.h4
-rw-r--r--fs/ocfs2/reservations.c4
-rw-r--r--fs/ocfs2/reservations.h4
-rw-r--r--fs/ocfs2/resize.c4
-rw-r--r--fs/ocfs2/resize.h4
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/slot_map.h4
-rw-r--r--fs/ocfs2/stack_o2cb.c40
-rw-r--r--fs/ocfs2/stack_user.c4
-rw-r--r--fs/ocfs2/stackglue.c6
-rw-r--r--fs/ocfs2/stackglue.h4
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/suballoc.h4
-rw-r--r--fs/ocfs2/super.c4
-rw-r--r--fs/ocfs2/super.h4
-rw-r--r--fs/ocfs2/symlink.c4
-rw-r--r--fs/ocfs2/symlink.h4
-rw-r--r--fs/ocfs2/sysfile.c4
-rw-r--r--fs/ocfs2/sysfile.h4
-rw-r--r--fs/ocfs2/uptodate.c4
-rw-r--r--fs/ocfs2/uptodate.h4
-rw-r--r--fs/ocfs2/xattr.c4
-rw-r--r--fs/ocfs2/xattr.h4
-rw-r--r--fs/openpromfs/inode.c67
-rw-r--r--fs/orangefs/file.c113
-rw-r--r--fs/orangefs/inode.c172
-rw-r--r--fs/orangefs/orangefs-mod.c2
-rw-r--r--fs/orangefs/orangefs-utils.c2
-rw-r--r--fs/overlayfs/copy_up.c3
-rw-r--r--fs/overlayfs/dir.c2
-rw-r--r--fs/overlayfs/file.c142
-rw-r--r--fs/overlayfs/inode.c95
-rw-r--r--fs/overlayfs/namei.c5
-rw-r--r--fs/overlayfs/overlayfs.h42
-rw-r--r--fs/overlayfs/readdir.c16
-rw-r--r--fs/overlayfs/super.c66
-rw-r--r--fs/overlayfs/util.c33
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/generic.c13
-rw-r--r--fs/proc/inode.c18
-rw-r--r--fs/proc/proc_sysctl.c15
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/pstore/platform.c5
-rw-r--r--fs/pstore/ram.c7
-rw-r--r--fs/pstore/ram_core.c18
-rw-r--r--fs/quota/dquot.c6
-rw-r--r--fs/quota/quota.c50
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/ioctl.c121
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/reiserfs/namei.c2
-rw-r--r--fs/reiserfs/procfs.c10
-rw-r--r--fs/reiserfs/reiserfs.h7
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/seq_file.c18
-rw-r--r--fs/signalfd.c19
-rw-r--r--fs/squashfs/file.c6
-rw-r--r--fs/stat.c8
-rw-r--r--fs/super.c1
-rw-r--r--fs/tracefs/inode.c2
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/ubifs/gc.c7
-rw-r--r--fs/ubifs/ioctl.c78
-rw-r--r--fs/ubifs/replay.c7
-rw-r--r--fs/ubifs/sb.c3
-rw-r--r--fs/ubifs/super.c6
-rw-r--r--fs/ubifs/ubifs.h3
-rw-r--r--fs/udf/namei.c3
-rw-r--r--fs/ufs/super.c3
-rw-r--r--fs/unicode/.gitignore4
-rw-r--r--fs/userfaultfd.c149
-rw-r--r--fs/vboxsf/dir.c4
-rw-r--r--fs/vboxsf/super.c4
-rw-r--r--fs/vboxsf/utils.c68
-rw-r--r--fs/vboxsf/vfsmod.h4
-rw-r--r--fs/verity/Kconfig8
-rw-r--r--fs/xattr.c14
-rw-r--r--fs/xfs/libxfs/xfs_ag.c115
-rw-r--r--fs/xfs/libxfs/xfs_ag.h2
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c52
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c25
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr.c54
-rw-r--r--fs/xfs/libxfs/xfs_attr.h1
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c35
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c239
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_btree_staging.c1
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c14
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c12
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c12
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c4
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c58
-rw-r--r--fs/xfs/libxfs/xfs_errortag.h4
-rw-r--r--fs/xfs/libxfs/xfs_format.h5
-rw-r--r--fs/xfs/libxfs/xfs_fs.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c127
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h33
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c48
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h20
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h12
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c4
-rw-r--r--fs/xfs/libxfs/xfs_sb.c16
-rw-r--r--fs/xfs/libxfs/xfs_shared.h4
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c24
-rw-r--r--fs/xfs/libxfs/xfs_types.c18
-rw-r--r--fs/xfs/scrub/agheader.c40
-rw-r--r--fs/xfs/scrub/alloc.c5
-rw-r--r--fs/xfs/scrub/attr.c5
-rw-r--r--fs/xfs/scrub/bitmap.c4
-rw-r--r--fs/xfs/scrub/bmap.c20
-rw-r--r--fs/xfs/scrub/btree.c30
-rw-r--r--fs/xfs/scrub/common.c42
-rw-r--r--fs/xfs/scrub/common.h58
-rw-r--r--fs/xfs/scrub/dir.c20
-rw-r--r--fs/xfs/scrub/fscounters.c43
-rw-r--r--fs/xfs/scrub/health.c3
-rw-r--r--fs/xfs/scrub/ialloc.c8
-rw-r--r--fs/xfs/scrub/inode.c5
-rw-r--r--fs/xfs/scrub/parent.c7
-rw-r--r--fs/xfs/scrub/quota.c11
-rw-r--r--fs/xfs/scrub/refcount.c5
-rw-r--r--fs/xfs/scrub/repair.c11
-rw-r--r--fs/xfs/scrub/repair.h6
-rw-r--r--fs/xfs/scrub/rmap.c5
-rw-r--r--fs/xfs/scrub/rtbitmap.c7
-rw-r--r--fs/xfs/scrub/scrub.c42
-rw-r--r--fs/xfs/scrub/scrub.h14
-rw-r--r--fs/xfs/scrub/symlink.c9
-rw-r--r--fs/xfs/scrub/xfs_scrub.h4
-rw-r--r--fs/xfs/xfs_aops.c138
-rw-r--r--fs/xfs/xfs_attr_list.c2
-rw-r--r--fs/xfs/xfs_bmap_item.c4
-rw-r--r--fs/xfs/xfs_bmap_util.c305
-rw-r--r--fs/xfs/xfs_buf.c6
-rw-r--r--fs/xfs/xfs_buf_item.c141
-rw-r--r--fs/xfs/xfs_dir2_readdir.c12
-rw-r--r--fs/xfs/xfs_dquot.c10
-rw-r--r--fs/xfs/xfs_error.c5
-rw-r--r--fs/xfs/xfs_extent_busy.c4
-rw-r--r--fs/xfs/xfs_extent_busy.h3
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_file.c12
-rw-r--r--fs/xfs/xfs_filestream.h2
-rw-r--r--fs/xfs/xfs_fsmap.c14
-rw-r--r--fs/xfs/xfs_fsops.c197
-rw-r--r--fs/xfs/xfs_icache.c35
-rw-r--r--fs/xfs/xfs_inode.c301
-rw-r--r--fs/xfs/xfs_inode.h42
-rw-r--r--fs/xfs/xfs_inode_item.c64
-rw-r--r--fs/xfs/xfs_inode_item_recover.c6
-rw-r--r--fs/xfs/xfs_ioctl.c424
-rw-r--r--fs/xfs/xfs_ioctl.h11
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_ioctl32.h2
-rw-r--r--fs/xfs/xfs_iomap.c27
-rw-r--r--fs/xfs/xfs_iops.c72
-rw-r--r--fs/xfs/xfs_itable.c19
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_log.c10
-rw-r--r--fs/xfs/xfs_log_recover.c13
-rw-r--r--fs/xfs/xfs_message.h2
-rw-r--r--fs/xfs/xfs_mount.c29
-rw-r--r--fs/xfs/xfs_mount.h8
-rw-r--r--fs/xfs/xfs_ondisk.h4
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_qm.c22
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c2
-rw-r--r--fs/xfs/xfs_quotaops.c2
-rw-r--r--fs/xfs/xfs_refcount_item.c4
-rw-r--r--fs/xfs/xfs_reflink.c25
-rw-r--r--fs/xfs/xfs_rmap_item.c4
-rw-r--r--fs/xfs/xfs_rtalloc.c16
-rw-r--r--fs/xfs/xfs_super.c132
-rw-r--r--fs/xfs/xfs_super.h1
-rw-r--r--fs/xfs/xfs_symlink.c32
-rw-r--r--fs/xfs/xfs_trace.h16
-rw-r--r--fs/xfs/xfs_trans.c24
-rw-r--r--fs/xfs/xfs_trans.h15
-rw-r--r--fs/xfs/xfs_xattr.c2
-rw-r--r--fs/zonefs/super.c5
638 files changed, 17503 insertions, 11723 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 39def020a074..cdb99507ef33 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -583,7 +583,7 @@ static struct attribute *v9fs_attrs[] = {
NULL,
};
-static struct attribute_group v9fs_attr_group = {
+static const struct attribute_group v9fs_attr_group = {
.attrs = v9fs_attrs,
};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 649f04f112dc..59c32c9b799f 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -86,8 +86,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
* to work.
*/
writeback_fid = v9fs_writeback_fid(file_dentry(file));
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
+ if (IS_ERR(writeback_fid)) {
+ err = PTR_ERR(writeback_fid);
mutex_unlock(&v9inode->v_mutex);
goto out_error;
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 8d97f0b45e9c..795706520b5e 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -399,7 +399,7 @@ static int v9fs_test_inode(struct inode *inode, void *data)
umode = p9mode2unixmode(v9ses, st, &rdev);
/* don't match inode of different type */
- if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
+ if (inode_wrong_type(inode, umode))
return 0;
/* compare qid details */
@@ -1390,7 +1390,7 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
* Don't update inode if the file type is different
*/
umode = p9mode2unixmode(v9ses, st, &rdev);
- if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
+ if (inode_wrong_type(inode, umode))
goto out;
/*
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 1dc7af046615..e1c0240b51c0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -59,7 +59,7 @@ static int v9fs_test_inode_dotl(struct inode *inode, void *data)
struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
/* don't match inode of different type */
- if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
+ if (inode_wrong_type(inode, st->st_mode))
return 0;
if (inode->i_generation != st->st_gen)
@@ -663,14 +663,10 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
if (stat->st_result_mask & P9_STATS_NLINK)
set_nlink(inode, stat->st_nlink);
if (stat->st_result_mask & P9_STATS_MODE) {
- inode->i_mode = stat->st_mode;
- if ((S_ISBLK(inode->i_mode)) ||
- (S_ISCHR(inode->i_mode)))
- init_special_inode(inode, inode->i_mode,
- inode->i_rdev);
+ mode = stat->st_mode & S_IALLUGO;
+ mode |= inode->i_mode & ~S_IALLUGO;
+ inode->i_mode = mode;
}
- if (stat->st_result_mask & P9_STATS_RDEV)
- inode->i_rdev = new_decode_dev(stat->st_rdev);
if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE) &&
stat->st_result_mask & P9_STATS_SIZE)
v9fs_i_size_write(inode, stat->st_size);
@@ -959,7 +955,7 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
/*
* Don't update inode if the file type is different
*/
- if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
+ if (inode_wrong_type(inode, st->st_mode))
goto out;
/*
diff --git a/fs/Kconfig b/fs/Kconfig
index a55bda4233bb..141a856c50e7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -125,6 +125,7 @@ source "fs/overlayfs/Kconfig"
menu "Caches"
+source "fs/netfs/Kconfig"
source "fs/fscache/Kconfig"
source "fs/cachefiles/Kconfig"
@@ -222,10 +223,13 @@ config TMPFS_INODE64
If unsure, say N.
+config ARCH_SUPPORTS_HUGETLBFS
+ def_bool n
+
config HUGETLBFS
bool "HugeTLB file system support"
depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \
- SYS_SUPPORTS_HUGETLBFS || BROKEN
+ ARCH_SUPPORTS_HUGETLBFS || BROKEN
help
hugetlbfs is a filesystem backing for HugeTLB pages, based on
ramfs. For architectures that support it, say Y here and read
@@ -334,8 +338,8 @@ config NFS_COMMON
default y
config NFS_V4_2_SSC_HELPER
- tristate
- default y if NFS_V4=y || NFS_FS=y
+ bool
+ default y if NFS_V4_2
source "net/sunrpc/Kconfig"
source "fs/ceph/Kconfig"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index c6f1c8c1934e..06fb7a93a1bd 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -112,6 +112,9 @@ config BINFMT_FLAT_ARGVP_ENVP_ON_STACK
config BINFMT_FLAT_OLD_ALWAYS_RAM
bool
+config BINFMT_FLAT_NO_DATA_START_OFFSET
+ bool
+
config BINFMT_FLAT_OLD
bool "Enable support for very old legacy flat binaries"
depends on BINFMT_FLAT
diff --git a/fs/Makefile b/fs/Makefile
index 3215fe205256..9c708e1fbe8f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -67,6 +67,7 @@ obj-y += devpts/
obj-$(CONFIG_DLM) += dlm/
# Do not add any filesystems before this line
+obj-$(CONFIG_NETFS_SUPPORT) += netfs/
obj-$(CONFIG_FSCACHE) += fscache/
obj-$(CONFIG_REISERFS_FS) += reiserfs/
obj-$(CONFIG_EXT4_FS) += ext4/
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index 1ad211d72b3b..fc8ba9142f2f 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -4,6 +4,7 @@ config AFS_FS
depends on INET
select AF_RXRPC
select DNS_RESOLVER
+ select NETFS_SUPPORT
help
If you say Y here, you will get an experimental Andrew File System
driver. It currently only supports unsecured read-only AFS access.
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index a4e9e6e07e93..d3c6bb22c5f4 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -322,6 +322,8 @@ static int afs_deliver_cb_callback(struct afs_call *call)
return ret;
call->unmarshall++;
+ fallthrough;
+
case 5:
break;
}
@@ -418,6 +420,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
r->node[loop] = ntohl(b[loop + 5]);
call->unmarshall++;
+ fallthrough;
case 2:
break;
@@ -530,6 +533,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
r->node[loop] = ntohl(b[loop + 5]);
call->unmarshall++;
+ fallthrough;
case 2:
break;
@@ -663,6 +667,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
afs_extract_to_tmp(call);
call->unmarshall++;
+ fallthrough;
case 3:
break;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 17548c1faf02..78719f2f567e 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -103,6 +103,35 @@ struct afs_lookup_cookie {
};
/*
+ * Drop the refs that we're holding on the pages we were reading into. We've
+ * got refs on the first nr_pages pages.
+ */
+static void afs_dir_read_cleanup(struct afs_read *req)
+{
+ struct address_space *mapping = req->vnode->vfs_inode.i_mapping;
+ struct page *page;
+ pgoff_t last = req->nr_pages - 1;
+
+ XA_STATE(xas, &mapping->i_pages, 0);
+
+ if (unlikely(!req->nr_pages))
+ return;
+
+ rcu_read_lock();
+ xas_for_each(&xas, page, last) {
+ if (xas_retry(&xas, page))
+ continue;
+ BUG_ON(xa_is_value(page));
+ BUG_ON(PageCompound(page));
+ ASSERTCMP(page->mapping, ==, mapping);
+
+ put_page(page);
+ }
+
+ rcu_read_unlock();
+}
+
+/*
* check that a directory page is valid
*/
static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
@@ -127,7 +156,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
qty /= sizeof(union afs_xdr_dir_block);
/* check them */
- dbuf = kmap(page);
+ dbuf = kmap_atomic(page);
for (tmp = 0; tmp < qty; tmp++) {
if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
@@ -146,7 +175,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
}
- kunmap(page);
+ kunmap_atomic(dbuf);
checked:
afs_stat_v(dvnode, n_read_dir);
@@ -157,35 +186,74 @@ error:
}
/*
- * Check the contents of a directory that we've just read.
+ * Dump the contents of a directory.
*/
-static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req)
+static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
{
struct afs_xdr_dir_page *dbuf;
- unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
+ struct address_space *mapping = dvnode->vfs_inode.i_mapping;
+ struct page *page;
+ unsigned int i, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
+ pgoff_t last = req->nr_pages - 1;
- for (i = 0; i < req->nr_pages; i++)
- if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len))
- goto bad;
- return true;
+ XA_STATE(xas, &mapping->i_pages, 0);
-bad:
- pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n",
+ pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n",
dvnode->fid.vid, dvnode->fid.vnode,
- req->file_size, req->len, req->actual_len, req->remain);
- pr_warn("DIR %llx %x %x %x\n",
- req->pos, req->index, req->nr_pages, req->offset);
+ req->file_size, req->len, req->actual_len);
+ pr_warn("DIR %llx %x %zx %zx\n",
+ req->pos, req->nr_pages,
+ req->iter->iov_offset, iov_iter_count(req->iter));
- for (i = 0; i < req->nr_pages; i++) {
- dbuf = kmap(req->pages[i]);
- for (j = 0; j < qty; j++) {
- union afs_xdr_dir_block *block = &dbuf->blocks[j];
+ xas_for_each(&xas, page, last) {
+ if (xas_retry(&xas, page))
+ continue;
+
+ BUG_ON(PageCompound(page));
+ BUG_ON(page->mapping != mapping);
+
+ dbuf = kmap_atomic(page);
+ for (i = 0; i < qty; i++) {
+ union afs_xdr_dir_block *block = &dbuf->blocks[i];
- pr_warn("[%02x] %32phN\n", i * qty + j, block);
+ pr_warn("[%02lx] %32phN\n", page->index * qty + i, block);
}
- kunmap(req->pages[i]);
+ kunmap_atomic(dbuf);
}
- return false;
+}
+
+/*
+ * Check all the pages in a directory. All the pages are held pinned.
+ */
+static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
+{
+ struct address_space *mapping = dvnode->vfs_inode.i_mapping;
+ struct page *page;
+ pgoff_t last = req->nr_pages - 1;
+ int ret = 0;
+
+ XA_STATE(xas, &mapping->i_pages, 0);
+
+ if (unlikely(!req->nr_pages))
+ return 0;
+
+ rcu_read_lock();
+ xas_for_each(&xas, page, last) {
+ if (xas_retry(&xas, page))
+ continue;
+
+ BUG_ON(PageCompound(page));
+ BUG_ON(page->mapping != mapping);
+
+ if (!afs_dir_check_page(dvnode, page, req->file_size)) {
+ afs_dir_dump(dvnode, req);
+ ret = -EIO;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ return ret;
}
/*
@@ -214,57 +282,57 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
{
struct afs_read *req;
loff_t i_size;
- int nr_pages, nr_inline, i, n;
- int ret = -ENOMEM;
+ int nr_pages, i, n;
+ int ret;
+
+ _enter("");
-retry:
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return ERR_PTR(-ENOMEM);
+
+ refcount_set(&req->usage, 1);
+ req->vnode = dvnode;
+ req->key = key_get(key);
+ req->cleanup = afs_dir_read_cleanup;
+
+expand:
i_size = i_size_read(&dvnode->vfs_inode);
- if (i_size < 2048)
- return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small));
+ if (i_size < 2048) {
+ ret = afs_bad(dvnode, afs_file_error_dir_small);
+ goto error;
+ }
if (i_size > 2048 * 1024) {
trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
- return ERR_PTR(-EFBIG);
+ ret = -EFBIG;
+ goto error;
}
_enter("%llu", i_size);
- /* Get a request record to hold the page list. We want to hold it
- * inline if we can, but we don't want to make an order 1 allocation.
- */
nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
- nr_inline = nr_pages;
- if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
- nr_inline = 0;
- req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL);
- if (!req)
- return ERR_PTR(-ENOMEM);
-
- refcount_set(&req->usage, 1);
- req->nr_pages = nr_pages;
req->actual_len = i_size; /* May change */
req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
req->data_version = dvnode->status.data_version; /* May change */
- if (nr_inline > 0) {
- req->pages = req->array;
- } else {
- req->pages = kcalloc(nr_pages, sizeof(struct page *),
- GFP_KERNEL);
- if (!req->pages)
- goto error;
- }
+ iov_iter_xarray(&req->def_iter, READ, &dvnode->vfs_inode.i_mapping->i_pages,
+ 0, i_size);
+ req->iter = &req->def_iter;
- /* Get a list of all the pages that hold or will hold the directory
- * content. We need to fill in any gaps that we might find where the
- * memory reclaimer has been at work. If there are any gaps, we will
+ /* Fill in any gaps that we might find where the memory reclaimer has
+ * been at work and pin all the pages. If there are any gaps, we will
* need to reread the entire directory contents.
*/
- i = 0;
- do {
+ i = req->nr_pages;
+ while (i < nr_pages) {
+ struct page *pages[8], *page;
+
n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
- req->nr_pages - i,
- req->pages + i);
- _debug("find %u at %u/%u", n, i, req->nr_pages);
+ min_t(unsigned int, nr_pages - i,
+ ARRAY_SIZE(pages)),
+ pages);
+ _debug("find %u at %u/%u", n, i, nr_pages);
+
if (n == 0) {
gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;
@@ -272,22 +340,24 @@ retry:
afs_stat_v(dvnode, n_inval);
ret = -ENOMEM;
- req->pages[i] = __page_cache_alloc(gfp);
- if (!req->pages[i])
+ page = __page_cache_alloc(gfp);
+ if (!page)
goto error;
- ret = add_to_page_cache_lru(req->pages[i],
+ ret = add_to_page_cache_lru(page,
dvnode->vfs_inode.i_mapping,
i, gfp);
if (ret < 0)
goto error;
- attach_page_private(req->pages[i], (void *)1);
- unlock_page(req->pages[i]);
+ attach_page_private(page, (void *)1);
+ unlock_page(page);
+ req->nr_pages++;
i++;
} else {
+ req->nr_pages += n;
i += n;
}
- } while (i < req->nr_pages);
+ }
/* If we're going to reload, we need to lock all the pages to prevent
* races.
@@ -305,18 +375,23 @@ retry:
if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
trace_afs_reload_dir(dvnode);
- ret = afs_fetch_data(dvnode, key, req);
+ ret = afs_fetch_data(dvnode, req);
if (ret < 0)
goto error_unlock;
task_io_account_read(PAGE_SIZE * req->nr_pages);
- if (req->len < req->file_size)
- goto content_has_grown;
+ if (req->len < req->file_size) {
+ /* The content has grown, so we need to expand the
+ * buffer.
+ */
+ up_write(&dvnode->validate_lock);
+ goto expand;
+ }
/* Validate the data we just read. */
- ret = -EIO;
- if (!afs_dir_check_pages(dvnode, req))
+ ret = afs_dir_check(dvnode, req);
+ if (ret < 0)
goto error_unlock;
// TODO: Trim excess pages
@@ -334,11 +409,6 @@ error:
afs_put_read(req);
_leave(" = %d", ret);
return ERR_PTR(ret);
-
-content_has_grown:
- up_write(&dvnode->validate_lock);
- afs_put_read(req);
- goto retry;
}
/*
@@ -448,6 +518,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
struct afs_read *req;
struct page *page;
unsigned blkoff, limit;
+ void __rcu **slot;
int ret;
_enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
@@ -472,9 +543,15 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);
/* Fetch the appropriate page from the directory and re-add it
- * to the LRU.
+ * to the LRU. We have all the pages pinned with an extra ref.
*/
- page = req->pages[blkoff / PAGE_SIZE];
+ rcu_read_lock();
+ page = NULL;
+ slot = radix_tree_lookup_slot(&dvnode->vfs_inode.i_mapping->i_pages,
+ blkoff / PAGE_SIZE);
+ if (slot)
+ page = radix_tree_deref_slot(slot);
+ rcu_read_unlock();
if (!page) {
ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
break;
@@ -1342,6 +1419,7 @@ static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
op->file[0].update_ctime = true;
op->dentry = dentry;
op->create.mode = S_IFDIR | mode;
@@ -1423,6 +1501,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
op->file[0].update_ctime = true;
op->dentry = dentry;
@@ -1559,6 +1638,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
op->file[0].update_ctime = true;
/* Try to make sure we have a callback promise on the victim. */
@@ -1641,6 +1721,7 @@ static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
afs_op_set_vnode(op, 0, dvnode);
op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
op->file[0].update_ctime = true;
op->dentry = dentry;
@@ -1715,6 +1796,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
afs_op_set_vnode(op, 0, dvnode);
afs_op_set_vnode(op, 1, vnode);
op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
op->file[0].update_ctime = true;
op->file[1].update_ctime = true;
@@ -1837,7 +1919,9 @@ static void afs_rename_edit_dir(struct afs_operation *op)
new_inode = d_inode(new_dentry);
if (new_inode) {
spin_lock(&new_inode->i_lock);
- if (new_inode->i_nlink > 0)
+ if (S_ISDIR(new_inode->i_mode))
+ clear_nlink(new_inode);
+ else if (new_inode->i_nlink > 0)
drop_nlink(new_inode);
spin_unlock(&new_inode->i_lock);
}
@@ -1910,6 +1994,8 @@ static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
afs_op_set_vnode(op, 1, new_dvnode); /* May be same as orig_dvnode */
op->file[0].dv_delta = 1;
op->file[1].dv_delta = 1;
+ op->file[0].modification = true;
+ op->file[1].modification = true;
op->file[0].update_ctime = true;
op->file[1].update_ctime = true;
@@ -2006,6 +2092,6 @@ static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
afs_stat_v(dvnode, n_inval);
/* we clean up only if the entire page is being invalidated */
- if (offset == 0 && length == PAGE_SIZE)
+ if (offset == 0 && length == thp_size(page))
detach_page_private(page);
}
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index 04f75a44f243..dae9a57d7ec0 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -73,6 +73,8 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
afs_op_set_vnode(op, 1, dvnode);
op->file[0].dv_delta = 1;
op->file[1].dv_delta = 1;
+ op->file[0].modification = true;
+ op->file[1].modification = true;
op->file[0].update_ctime = true;
op->file[1].update_ctime = true;
@@ -201,6 +203,7 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode
afs_op_set_vnode(op, 0, dvnode);
afs_op_set_vnode(op, 1, vnode);
op->file[0].dv_delta = 1;
+ op->file[0].modification = true;
op->file[0].update_ctime = true;
op->file[1].op_unlinked = true;
op->file[1].update_ctime = true;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 960b64268623..db035ae2a134 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -14,6 +14,7 @@
#include <linux/gfp.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/mm.h>
+#include <linux/netfs.h>
#include "internal.h"
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
@@ -22,8 +23,7 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
static int afs_releasepage(struct page *page, gfp_t gfp_flags);
-static int afs_readpages(struct file *filp, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages);
+static void afs_readahead(struct readahead_control *ractl);
const struct file_operations afs_file_operations = {
.open = afs_open,
@@ -47,7 +47,7 @@ const struct inode_operations afs_file_inode_operations = {
const struct address_space_operations afs_fs_aops = {
.readpage = afs_readpage,
- .readpages = afs_readpages,
+ .readahead = afs_readahead,
.set_page_dirty = afs_set_page_dirty,
.launder_page = afs_launder_page,
.releasepage = afs_releasepage,
@@ -184,41 +184,50 @@ int afs_release(struct inode *inode, struct file *file)
}
/*
+ * Allocate a new read record.
+ */
+struct afs_read *afs_alloc_read(gfp_t gfp)
+{
+ struct afs_read *req;
+
+ req = kzalloc(sizeof(struct afs_read), gfp);
+ if (req)
+ refcount_set(&req->usage, 1);
+
+ return req;
+}
+
+/*
* Dispose of a ref to a read record.
*/
void afs_put_read(struct afs_read *req)
{
- int i;
-
if (refcount_dec_and_test(&req->usage)) {
- if (req->pages) {
- for (i = 0; i < req->nr_pages; i++)
- if (req->pages[i])
- put_page(req->pages[i]);
- if (req->pages != req->array)
- kfree(req->pages);
- }
+ if (req->cleanup)
+ req->cleanup(req);
+ key_put(req->key);
kfree(req);
}
}
-#ifdef CONFIG_AFS_FSCACHE
-/*
- * deal with notification that a page was read from the cache
- */
-static void afs_file_readpage_read_complete(struct page *page,
- void *data,
- int error)
+static void afs_fetch_data_notify(struct afs_operation *op)
{
- _enter("%p,%p,%d", page, data, error);
-
- /* if the read completes with an error, we just unlock the page and let
- * the VM reissue the readpage */
- if (!error)
- SetPageUptodate(page);
- unlock_page(page);
+ struct afs_read *req = op->fetch.req;
+ struct netfs_read_subrequest *subreq = req->subreq;
+ int error = op->error;
+
+ if (error == -ECONNABORTED)
+ error = afs_abort_to_error(op->ac.abort_code);
+ req->error = error;
+
+ if (subreq) {
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
+ req->subreq = NULL;
+ } else if (req->done) {
+ req->done(req);
+ }
}
-#endif
static void afs_fetch_data_success(struct afs_operation *op)
{
@@ -228,10 +237,12 @@ static void afs_fetch_data_success(struct afs_operation *op)
afs_vnode_commit_status(op, &op->file[0]);
afs_stat_v(vnode, n_fetches);
atomic_long_add(op->fetch.req->actual_len, &op->net->n_fetch_bytes);
+ afs_fetch_data_notify(op);
}
static void afs_fetch_data_put(struct afs_operation *op)
{
+ op->fetch.req->error = op->error;
afs_put_read(op->fetch.req);
}
@@ -240,13 +251,14 @@ static const struct afs_operation_ops afs_fetch_data_operation = {
.issue_yfs_rpc = yfs_fs_fetch_data,
.success = afs_fetch_data_success,
.aborted = afs_check_for_remote_deletion,
+ .failed = afs_fetch_data_notify,
.put = afs_fetch_data_put,
};
/*
* Fetch file data from the volume.
*/
-int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *req)
+int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req)
{
struct afs_operation *op;
@@ -255,11 +267,14 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *re
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
- key_serial(key));
+ key_serial(req->key));
- op = afs_alloc_operation(key, vnode->volume);
- if (IS_ERR(op))
+ op = afs_alloc_operation(req->key, vnode->volume);
+ if (IS_ERR(op)) {
+ if (req->subreq)
+ netfs_subreq_terminated(req->subreq, PTR_ERR(op), false);
return PTR_ERR(op);
+ }
afs_op_set_vnode(op, 0, vnode);
@@ -268,336 +283,103 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *re
return afs_do_sync_operation(op);
}
-/*
- * read page from file, directory or symlink, given a key to use
- */
-int afs_page_filler(void *data, struct page *page)
+static void afs_req_issue_op(struct netfs_read_subrequest *subreq)
{
- struct inode *inode = page->mapping->host;
- struct afs_vnode *vnode = AFS_FS_I(inode);
- struct afs_read *req;
- struct key *key = data;
- int ret;
-
- _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index);
+ struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
+ struct afs_read *fsreq;
- BUG_ON(!PageLocked(page));
+ fsreq = afs_alloc_read(GFP_NOFS);
+ if (!fsreq)
+ return netfs_subreq_terminated(subreq, -ENOMEM, false);
- ret = -ESTALE;
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
- goto error;
+ fsreq->subreq = subreq;
+ fsreq->pos = subreq->start + subreq->transferred;
+ fsreq->len = subreq->len - subreq->transferred;
+ fsreq->key = subreq->rreq->netfs_priv;
+ fsreq->vnode = vnode;
+ fsreq->iter = &fsreq->def_iter;
- /* is it cached? */
-#ifdef CONFIG_AFS_FSCACHE
- ret = fscache_read_or_alloc_page(vnode->cache,
- page,
- afs_file_readpage_read_complete,
- NULL,
- GFP_KERNEL);
-#else
- ret = -ENOBUFS;
-#endif
- switch (ret) {
- /* read BIO submitted (page in cache) */
- case 0:
- break;
-
- /* page not yet cached */
- case -ENODATA:
- _debug("cache said ENODATA");
- goto go_on;
-
- /* page will not be cached */
- case -ENOBUFS:
- _debug("cache said ENOBUFS");
-
- fallthrough;
- default:
- go_on:
- req = kzalloc(struct_size(req, array, 1), GFP_KERNEL);
- if (!req)
- goto enomem;
-
- /* We request a full page. If the page is a partial one at the
- * end of the file, the server will return a short read and the
- * unmarshalling code will clear the unfilled space.
- */
- refcount_set(&req->usage, 1);
- req->pos = (loff_t)page->index << PAGE_SHIFT;
- req->len = PAGE_SIZE;
- req->nr_pages = 1;
- req->pages = req->array;
- req->pages[0] = page;
- get_page(page);
-
- /* read the contents of the file from the server into the
- * page */
- ret = afs_fetch_data(vnode, key, req);
- afs_put_read(req);
-
- if (ret < 0) {
- if (ret == -ENOENT) {
- _debug("got NOENT from server"
- " - marking file deleted and stale");
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
- ret = -ESTALE;
- }
+ iov_iter_xarray(&fsreq->def_iter, READ,
+ &fsreq->vnode->vfs_inode.i_mapping->i_pages,
+ fsreq->pos, fsreq->len);
-#ifdef CONFIG_AFS_FSCACHE
- fscache_uncache_page(vnode->cache, page);
-#endif
- BUG_ON(PageFsCache(page));
-
- if (ret == -EINTR ||
- ret == -ENOMEM ||
- ret == -ERESTARTSYS ||
- ret == -EAGAIN)
- goto error;
- goto io_error;
- }
+ afs_fetch_data(fsreq->vnode, fsreq);
+}
- SetPageUptodate(page);
+static int afs_symlink_readpage(struct page *page)
+{
+ struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+ struct afs_read *fsreq;
+ int ret;
- /* send the page to the cache */
-#ifdef CONFIG_AFS_FSCACHE
- if (PageFsCache(page) &&
- fscache_write_page(vnode->cache, page, vnode->status.size,
- GFP_KERNEL) != 0) {
- fscache_uncache_page(vnode->cache, page);
- BUG_ON(PageFsCache(page));
- }
-#endif
- unlock_page(page);
- }
+ fsreq = afs_alloc_read(GFP_NOFS);
+ if (!fsreq)
+ return -ENOMEM;
- _leave(" = 0");
- return 0;
+ fsreq->pos = page->index * PAGE_SIZE;
+ fsreq->len = PAGE_SIZE;
+ fsreq->vnode = vnode;
+ fsreq->iter = &fsreq->def_iter;
+ iov_iter_xarray(&fsreq->def_iter, READ, &page->mapping->i_pages,
+ fsreq->pos, fsreq->len);
-io_error:
- SetPageError(page);
- goto error;
-enomem:
- ret = -ENOMEM;
-error:
- unlock_page(page);
- _leave(" = %d", ret);
+ ret = afs_fetch_data(fsreq->vnode, fsreq);
+ page_endio(page, false, ret);
return ret;
}
-/*
- * read page from file, directory or symlink, given a file to nominate the key
- * to be used
- */
-static int afs_readpage(struct file *file, struct page *page)
+static void afs_init_rreq(struct netfs_read_request *rreq, struct file *file)
{
- struct key *key;
- int ret;
-
- if (file) {
- key = afs_file_key(file);
- ASSERT(key != NULL);
- ret = afs_page_filler(key, page);
- } else {
- struct inode *inode = page->mapping->host;
- key = afs_request_key(AFS_FS_S(inode->i_sb)->cell);
- if (IS_ERR(key)) {
- ret = PTR_ERR(key);
- } else {
- ret = afs_page_filler(key, page);
- key_put(key);
- }
- }
- return ret;
+ rreq->netfs_priv = key_get(afs_file_key(file));
}
-/*
- * Make pages available as they're filled.
- */
-static void afs_readpages_page_done(struct afs_read *req)
+static bool afs_is_cache_enabled(struct inode *inode)
{
-#ifdef CONFIG_AFS_FSCACHE
- struct afs_vnode *vnode = req->vnode;
-#endif
- struct page *page = req->pages[req->index];
+ struct fscache_cookie *cookie = afs_vnode_cache(AFS_FS_I(inode));
- req->pages[req->index] = NULL;
- SetPageUptodate(page);
-
- /* send the page to the cache */
-#ifdef CONFIG_AFS_FSCACHE
- if (PageFsCache(page) &&
- fscache_write_page(vnode->cache, page, vnode->status.size,
- GFP_KERNEL) != 0) {
- fscache_uncache_page(vnode->cache, page);
- BUG_ON(PageFsCache(page));
- }
-#endif
- unlock_page(page);
- put_page(page);
+ return fscache_cookie_enabled(cookie) && !hlist_empty(&cookie->backing_objects);
}
-/*
- * Read a contiguous set of pages.
- */
-static int afs_readpages_one(struct file *file, struct address_space *mapping,
- struct list_head *pages)
+static int afs_begin_cache_operation(struct netfs_read_request *rreq)
{
- struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- struct afs_read *req;
- struct list_head *p;
- struct page *first, *page;
- struct key *key = afs_file_key(file);
- pgoff_t index;
- int ret, n, i;
-
- /* Count the number of contiguous pages at the front of the list. Note
- * that the list goes prev-wards rather than next-wards.
- */
- first = lru_to_page(pages);
- index = first->index + 1;
- n = 1;
- for (p = first->lru.prev; p != pages; p = p->prev) {
- page = list_entry(p, struct page, lru);
- if (page->index != index)
- break;
- index++;
- n++;
- }
-
- req = kzalloc(struct_size(req, array, n), GFP_NOFS);
- if (!req)
- return -ENOMEM;
-
- refcount_set(&req->usage, 1);
- req->vnode = vnode;
- req->page_done = afs_readpages_page_done;
- req->pos = first->index;
- req->pos <<= PAGE_SHIFT;
- req->pages = req->array;
-
- /* Transfer the pages to the request. We add them in until one fails
- * to add to the LRU and then we stop (as that'll make a hole in the
- * contiguous run.
- *
- * Note that it's possible for the file size to change whilst we're
- * doing this, but we rely on the server returning less than we asked
- * for if the file shrank. We also rely on this to deal with a partial
- * page at the end of the file.
- */
- do {
- page = lru_to_page(pages);
- list_del(&page->lru);
- index = page->index;
- if (add_to_page_cache_lru(page, mapping, index,
- readahead_gfp_mask(mapping))) {
-#ifdef CONFIG_AFS_FSCACHE
- fscache_uncache_page(vnode->cache, page);
-#endif
- put_page(page);
- break;
- }
-
- req->pages[req->nr_pages++] = page;
- req->len += PAGE_SIZE;
- } while (req->nr_pages < n);
+ struct afs_vnode *vnode = AFS_FS_I(rreq->inode);
- if (req->nr_pages == 0) {
- kfree(req);
- return 0;
- }
-
- ret = afs_fetch_data(vnode, key, req);
- if (ret < 0)
- goto error;
-
- task_io_account_read(PAGE_SIZE * req->nr_pages);
- afs_put_read(req);
- return 0;
-
-error:
- if (ret == -ENOENT) {
- _debug("got NOENT from server"
- " - marking file deleted and stale");
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
- ret = -ESTALE;
- }
-
- for (i = 0; i < req->nr_pages; i++) {
- page = req->pages[i];
- if (page) {
-#ifdef CONFIG_AFS_FSCACHE
- fscache_uncache_page(vnode->cache, page);
-#endif
- SetPageError(page);
- unlock_page(page);
- }
- }
-
- afs_put_read(req);
- return ret;
+ return fscache_begin_read_operation(rreq, afs_vnode_cache(vnode));
}
-/*
- * read a set of pages
- */
-static int afs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
+ struct page *page, void **_fsdata)
{
- struct key *key = afs_file_key(file);
- struct afs_vnode *vnode;
- int ret = 0;
-
- _enter("{%d},{%lu},,%d",
- key_serial(key), mapping->host->i_ino, nr_pages);
+ struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
- ASSERT(key != NULL);
+ return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0;
+}
- vnode = AFS_FS_I(mapping->host);
- if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
- _leave(" = -ESTALE");
- return -ESTALE;
- }
+static void afs_priv_cleanup(struct address_space *mapping, void *netfs_priv)
+{
+ key_put(netfs_priv);
+}
- /* attempt to read as many of the pages as possible */
-#ifdef CONFIG_AFS_FSCACHE
- ret = fscache_read_or_alloc_pages(vnode->cache,
- mapping,
- pages,
- &nr_pages,
- afs_file_readpage_read_complete,
- NULL,
- mapping_gfp_mask(mapping));
-#else
- ret = -ENOBUFS;
-#endif
+const struct netfs_read_request_ops afs_req_ops = {
+ .init_rreq = afs_init_rreq,
+ .is_cache_enabled = afs_is_cache_enabled,
+ .begin_cache_operation = afs_begin_cache_operation,
+ .check_write_begin = afs_check_write_begin,
+ .issue_op = afs_req_issue_op,
+ .cleanup = afs_priv_cleanup,
+};
- switch (ret) {
- /* all pages are being read from the cache */
- case 0:
- BUG_ON(!list_empty(pages));
- BUG_ON(nr_pages != 0);
- _leave(" = 0 [reading all]");
- return 0;
-
- /* there were pages that couldn't be read from the cache */
- case -ENODATA:
- case -ENOBUFS:
- break;
-
- /* other error */
- default:
- _leave(" = %d", ret);
- return ret;
- }
+static int afs_readpage(struct file *file, struct page *page)
+{
+ if (!file)
+ return afs_symlink_readpage(page);
- while (!list_empty(pages)) {
- ret = afs_readpages_one(file, mapping, pages);
- if (ret < 0)
- break;
- }
+ return netfs_readpage(file, page, &afs_req_ops, NULL);
+}
- _leave(" = %d [netting]", ret);
- return ret;
+static void afs_readahead(struct readahead_control *ractl)
+{
+ netfs_readahead(ractl, &afs_req_ops, NULL);
}
/*
@@ -625,8 +407,8 @@ static void afs_invalidate_dirty(struct page *page, unsigned int offset,
return;
/* We may need to shorten the dirty region */
- f = afs_page_dirty_from(priv);
- t = afs_page_dirty_to(priv);
+ f = afs_page_dirty_from(page, priv);
+ t = afs_page_dirty_to(page, priv);
if (t <= offset || f >= end)
return; /* Doesn't overlap */
@@ -644,17 +426,17 @@ static void afs_invalidate_dirty(struct page *page, unsigned int offset,
if (f == t)
goto undirty;
- priv = afs_page_dirty(f, t);
+ priv = afs_page_dirty(page, f, t);
set_page_private(page, priv);
- trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page->index, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page);
return;
undirty:
- trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page->index, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page);
clear_page_dirty_for_io(page);
full_invalidate:
- priv = (unsigned long)detach_page_private(page);
- trace_afs_page_dirty(vnode, tracepoint_string("inval"), page->index, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("inval"), page);
+ detach_page_private(page);
}
/*
@@ -669,20 +451,10 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
BUG_ON(!PageLocked(page));
-#ifdef CONFIG_AFS_FSCACHE
- /* we clean up only if the entire page is being invalidated */
- if (offset == 0 && length == PAGE_SIZE) {
- if (PageFsCache(page)) {
- struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
- fscache_wait_on_page_write(vnode->cache, page);
- fscache_uncache_page(vnode->cache, page);
- }
- }
-#endif
-
if (PagePrivate(page))
afs_invalidate_dirty(page, offset, length);
+ wait_on_page_fscache(page);
_leave("");
}
@@ -693,7 +465,6 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
static int afs_releasepage(struct page *page, gfp_t gfp_flags)
{
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
- unsigned long priv;
_enter("{{%llx:%llu}[%lu],%lx},%x",
vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
@@ -702,16 +473,16 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
/* deny if page is being written to the cache and the caller hasn't
* elected to wait */
#ifdef CONFIG_AFS_FSCACHE
- if (!fscache_maybe_release_page(vnode->cache, page, gfp_flags)) {
- _leave(" = F [cache busy]");
- return 0;
+ if (PageFsCache(page)) {
+ if (!(gfp_flags & __GFP_DIRECT_RECLAIM) || !(gfp_flags & __GFP_FS))
+ return false;
+ wait_on_page_fscache(page);
}
#endif
if (PagePrivate(page)) {
- priv = (unsigned long)detach_page_private(page);
- trace_afs_page_dirty(vnode, tracepoint_string("rel"),
- page->index, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("rel"), page);
+ detach_page_private(page);
}
/* indicate that the page can be released */
diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c
index 71c58723763d..d222dfbe976b 100644
--- a/fs/afs/fs_operation.c
+++ b/fs/afs/fs_operation.c
@@ -118,6 +118,8 @@ static void afs_prepare_vnode(struct afs_operation *op, struct afs_vnode_param *
vp->cb_break_before = afs_calc_vnode_cb_break(vnode);
if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
op->flags |= AFS_OPERATION_CUR_ONLY;
+ if (vp->modification)
+ set_bit(AFS_VNODE_MODIFYING, &vnode->flags);
}
if (vp->fid.vnode)
@@ -198,8 +200,10 @@ void afs_wait_for_operation(struct afs_operation *op)
case -ECONNABORTED:
if (op->ops->aborted)
op->ops->aborted(op);
- break;
+ fallthrough;
default:
+ if (op->ops->failed)
+ op->ops->failed(op);
break;
}
@@ -223,6 +227,10 @@ int afs_put_operation(struct afs_operation *op)
if (op->ops && op->ops->put)
op->ops->put(op);
+ if (op->file[0].modification)
+ clear_bit(AFS_VNODE_MODIFYING, &op->file[0].vnode->flags);
+ if (op->file[1].modification && op->file[1].vnode != op->file[0].vnode)
+ clear_bit(AFS_VNODE_MODIFYING, &op->file[1].vnode->flags);
if (op->file[0].put_vnode)
iput(&op->file[0].vnode->vfs_inode);
if (op->file[1].put_vnode)
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 1d95ed9dd86e..dd3f45d906d2 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -10,6 +10,7 @@
#include <linux/sched.h>
#include <linux/circ_buf.h>
#include <linux/iversion.h>
+#include <linux/netfs.h>
#include "internal.h"
#include "afs_fs.h"
#include "xdr_fs.h"
@@ -302,17 +303,15 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
struct afs_vnode_param *vp = &op->file[0];
struct afs_read *req = op->fetch.req;
const __be32 *bp;
- unsigned int size;
int ret;
- _enter("{%u,%zu/%llu}",
- call->unmarshall, iov_iter_count(call->iter), req->actual_len);
+ _enter("{%u,%zu,%zu/%llu}",
+ call->unmarshall, call->iov_len, iov_iter_count(call->iter),
+ req->actual_len);
switch (call->unmarshall) {
case 0:
req->actual_len = 0;
- req->index = 0;
- req->offset = req->pos & (PAGE_SIZE - 1);
call->unmarshall++;
if (call->operation_ID == FSFETCHDATA64) {
afs_extract_to_tmp64(call);
@@ -322,7 +321,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
}
fallthrough;
- /* extract the returned data length */
+ /* Extract the returned data length into
+ * ->actual_len. This may indicate more or less data than was
+ * requested will be returned.
+ */
case 1:
_debug("extract data length");
ret = afs_extract_data(call, true);
@@ -331,44 +333,25 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
req->actual_len = be64_to_cpu(call->tmp64);
_debug("DATA length: %llu", req->actual_len);
- req->remain = min(req->len, req->actual_len);
- if (req->remain == 0)
+
+ if (req->actual_len == 0)
goto no_more_data;
+ call->iter = req->iter;
+ call->iov_len = min(req->actual_len, req->len);
call->unmarshall++;
-
- begin_page:
- ASSERTCMP(req->index, <, req->nr_pages);
- if (req->remain > PAGE_SIZE - req->offset)
- size = PAGE_SIZE - req->offset;
- else
- size = req->remain;
- call->bvec[0].bv_len = size;
- call->bvec[0].bv_offset = req->offset;
- call->bvec[0].bv_page = req->pages[req->index];
- iov_iter_bvec(&call->def_iter, READ, call->bvec, 1, size);
- ASSERTCMP(size, <=, PAGE_SIZE);
fallthrough;
/* extract the returned data */
case 2:
_debug("extract data %zu/%llu",
- iov_iter_count(call->iter), req->remain);
+ iov_iter_count(call->iter), req->actual_len);
ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
- req->remain -= call->bvec[0].bv_len;
- req->offset += call->bvec[0].bv_len;
- ASSERTCMP(req->offset, <=, PAGE_SIZE);
- if (req->offset == PAGE_SIZE) {
- req->offset = 0;
- req->index++;
- if (req->remain > 0)
- goto begin_page;
- }
- ASSERTCMP(req->remain, ==, 0);
+ call->iter = &call->def_iter;
if (req->actual_len <= req->len)
goto no_more_data;
@@ -405,22 +388,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
req->file_size = vp->scb.status.size;
call->unmarshall++;
+ fallthrough;
case 5:
break;
}
- for (; req->index < req->nr_pages; req->index++) {
- if (req->offset < PAGE_SIZE)
- zero_user_segment(req->pages[req->index],
- req->offset, PAGE_SIZE);
- req->offset = 0;
- }
-
- if (req->page_done)
- for (req->index = 0; req->index < req->nr_pages; req->index++)
- req->page_done(req);
-
_leave(" = 0 [done]");
return 0;
}
@@ -494,6 +467,8 @@ void afs_fs_fetch_data(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
+ req->call_debug_id = call->debug_id;
+
/* marshall the parameters */
bp = call->request;
bp[0] = htonl(FSFETCHDATA);
@@ -1079,8 +1054,7 @@ static const struct afs_call_type afs_RXFSStoreData64 = {
/*
* store a set of pages to a very large file
*/
-static void afs_fs_store_data64(struct afs_operation *op,
- loff_t pos, loff_t size, loff_t i_size)
+static void afs_fs_store_data64(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
struct afs_call *call;
@@ -1095,7 +1069,7 @@ static void afs_fs_store_data64(struct afs_operation *op,
if (!call)
return afs_op_nomem(op);
- call->send_pages = true;
+ call->write_iter = op->store.write_iter;
/* marshall the parameters */
bp = call->request;
@@ -1111,47 +1085,38 @@ static void afs_fs_store_data64(struct afs_operation *op,
*bp++ = 0; /* unix mode */
*bp++ = 0; /* segment size */
- *bp++ = htonl(upper_32_bits(pos));
- *bp++ = htonl(lower_32_bits(pos));
- *bp++ = htonl(upper_32_bits(size));
- *bp++ = htonl(lower_32_bits(size));
- *bp++ = htonl(upper_32_bits(i_size));
- *bp++ = htonl(lower_32_bits(i_size));
+ *bp++ = htonl(upper_32_bits(op->store.pos));
+ *bp++ = htonl(lower_32_bits(op->store.pos));
+ *bp++ = htonl(upper_32_bits(op->store.size));
+ *bp++ = htonl(lower_32_bits(op->store.size));
+ *bp++ = htonl(upper_32_bits(op->store.i_size));
+ *bp++ = htonl(lower_32_bits(op->store.i_size));
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
}
/*
- * store a set of pages
+ * Write data to a file on the server.
*/
void afs_fs_store_data(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
struct afs_call *call;
- loff_t size, pos, i_size;
__be32 *bp;
_enter(",%x,{%llx:%llu},,",
key_serial(op->key), vp->fid.vid, vp->fid.vnode);
- size = (loff_t)op->store.last_to - (loff_t)op->store.first_offset;
- if (op->store.first != op->store.last)
- size += (loff_t)(op->store.last - op->store.first) << PAGE_SHIFT;
- pos = (loff_t)op->store.first << PAGE_SHIFT;
- pos += op->store.first_offset;
-
- i_size = i_size_read(&vp->vnode->vfs_inode);
- if (pos + size > i_size)
- i_size = size + pos;
-
_debug("size %llx, at %llx, i_size %llx",
- (unsigned long long) size, (unsigned long long) pos,
- (unsigned long long) i_size);
+ (unsigned long long)op->store.size,
+ (unsigned long long)op->store.pos,
+ (unsigned long long)op->store.i_size);
- if (upper_32_bits(pos) || upper_32_bits(i_size) || upper_32_bits(size) ||
- upper_32_bits(pos + size))
- return afs_fs_store_data64(op, pos, size, i_size);
+ if (upper_32_bits(op->store.pos) ||
+ upper_32_bits(op->store.size) ||
+ upper_32_bits(op->store.i_size))
+ return afs_fs_store_data64(op);
call = afs_alloc_flat_call(op->net, &afs_RXFSStoreData,
(4 + 6 + 3) * 4,
@@ -1159,7 +1124,7 @@ void afs_fs_store_data(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
- call->send_pages = true;
+ call->write_iter = op->store.write_iter;
/* marshall the parameters */
bp = call->request;
@@ -1175,9 +1140,9 @@ void afs_fs_store_data(struct afs_operation *op)
*bp++ = 0; /* unix mode */
*bp++ = 0; /* segment size */
- *bp++ = htonl(lower_32_bits(pos));
- *bp++ = htonl(lower_32_bits(size));
- *bp++ = htonl(lower_32_bits(i_size));
+ *bp++ = htonl(lower_32_bits(op->store.pos));
+ *bp++ = htonl(lower_32_bits(op->store.size));
+ *bp++ = htonl(lower_32_bits(op->store.i_size));
trace_afs_make_fs_call(call, &vp->fid);
afs_make_op_call(op, call, GFP_NOFS);
@@ -1444,6 +1409,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
_debug("motd '%s'", p);
call->unmarshall++;
+ fallthrough;
case 8:
break;
@@ -1881,6 +1847,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
xdr_decode_AFSVolSync(&bp, &op->volsync);
call->unmarshall++;
+ fallthrough;
case 6:
break;
@@ -2015,6 +1982,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
xdr_decode_AFSVolSync(&bp, &op->volsync);
call->unmarshall++;
+ fallthrough;
case 4:
break;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 12be88716e4c..80b6c8d967d5 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -102,13 +102,13 @@ static int afs_inode_init_from_status(struct afs_operation *op,
switch (status->type) {
case AFS_FTYPE_FILE:
- inode->i_mode = S_IFREG | status->mode;
+ inode->i_mode = S_IFREG | (status->mode & S_IALLUGO);
inode->i_op = &afs_file_inode_operations;
inode->i_fop = &afs_file_operations;
inode->i_mapping->a_ops = &afs_fs_aops;
break;
case AFS_FTYPE_DIR:
- inode->i_mode = S_IFDIR | status->mode;
+ inode->i_mode = S_IFDIR | (status->mode & S_IALLUGO);
inode->i_op = &afs_dir_inode_operations;
inode->i_fop = &afs_dir_file_operations;
inode->i_mapping->a_ops = &afs_dir_aops;
@@ -198,7 +198,7 @@ static void afs_apply_status(struct afs_operation *op,
if (status->mode != vnode->status.mode) {
mode = inode->i_mode;
mode &= ~S_IALLUGO;
- mode |= status->mode;
+ mode |= status->mode & S_IALLUGO;
WRITE_ONCE(inode->i_mode, mode);
}
@@ -214,11 +214,12 @@ static void afs_apply_status(struct afs_operation *op,
if (vp->dv_before + vp->dv_delta != status->data_version) {
if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
- pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n",
+ pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n",
vnode->fid.vid, vnode->fid.vnode,
(unsigned long long)vp->dv_before + vp->dv_delta,
(unsigned long long)status->data_version,
- op->type ? op->type->name : "???");
+ op->type ? op->type->name : "???",
+ op->debug_id);
vnode->invalid_before = status->data_version;
if (vnode->status.type == AFS_FTYPE_DIR) {
@@ -293,8 +294,9 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
op->flags &= ~AFS_OPERATION_DIR_CONFLICT;
}
} else if (vp->scb.have_status) {
- if (vp->dv_before + vp->dv_delta != vp->scb.status.data_version &&
- vp->speculative)
+ if (vp->speculative &&
+ (test_bit(AFS_VNODE_MODIFYING, &vnode->flags) ||
+ vp->dv_before != vnode->status.data_version))
/* Ignore the result of a speculative bulk status fetch
* if it splits around a modification op, thereby
* appearing to regress the data version.
@@ -427,7 +429,7 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
} __packed key;
struct afs_vnode_cache_aux aux;
- if (vnode->status.type == AFS_FTYPE_DIR) {
+ if (vnode->status.type != AFS_FTYPE_FILE) {
vnode->cache = NULL;
return;
}
@@ -910,6 +912,7 @@ int afs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
op->ctime = attr->ia_ctime;
op->file[0].update_ctime = 1;
+ op->file[0].modification = true;
op->ops = &afs_setattr_operation;
ret = afs_do_sync_operation(op);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1627b1872812..5ed416f4ff33 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -14,6 +14,7 @@
#include <linux/key.h>
#include <linux/workqueue.h>
#include <linux/sched.h>
+#define FSCACHE_USE_NEW_IO_API
#include <linux/fscache.h>
#include <linux/backing-dev.h>
#include <linux/uuid.h>
@@ -31,6 +32,7 @@
struct pagevec;
struct afs_call;
+struct afs_vnode;
/*
* Partial file-locking emulation mode. (The problem being that AFS3 only
@@ -104,7 +106,9 @@ struct afs_call {
struct afs_server *server; /* The fileserver record if fs op (pins ref) */
struct afs_vlserver *vlserver; /* The vlserver record if vl op */
void *request; /* request data (first part) */
+ size_t iov_len; /* Size of *iter to be used */
struct iov_iter def_iter; /* Default buffer/data iterator */
+ struct iov_iter *write_iter; /* Iterator defining write to be made */
struct iov_iter *iter; /* Iterator currently in use */
union { /* Convenience for ->def_iter */
struct kvec kvec[1];
@@ -131,7 +135,6 @@ struct afs_call {
unsigned char unmarshall; /* unmarshalling phase */
unsigned char addr_ix; /* Address in ->alist */
bool drop_ref; /* T if need to drop ref for incoming call */
- bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
bool upgrade; /* T to request service upgrade */
@@ -202,17 +205,19 @@ struct afs_read {
loff_t pos; /* Where to start reading */
loff_t len; /* How much we're asking for */
loff_t actual_len; /* How much we're actually getting */
- loff_t remain; /* Amount remaining */
loff_t file_size; /* File size returned by server */
+ struct key *key; /* The key to use to reissue the read */
+ struct afs_vnode *vnode; /* The file being read into. */
+ struct netfs_read_subrequest *subreq; /* Fscache helper read request this belongs to */
afs_dataversion_t data_version; /* Version number returned by server */
refcount_t usage;
- unsigned int index; /* Which page we're reading into */
+ unsigned int call_debug_id;
unsigned int nr_pages;
- unsigned int offset; /* offset into current page */
- struct afs_vnode *vnode;
- void (*page_done)(struct afs_read *);
- struct page **pages;
- struct page *array[];
+ int error;
+ void (*done)(struct afs_read *);
+ void (*cleanup)(struct afs_read *);
+ struct iov_iter *iter; /* Iterator representing the buffer */
+ struct iov_iter def_iter; /* Default iterator */
};
/*
@@ -640,6 +645,7 @@ struct afs_vnode {
#define AFS_VNODE_PSEUDODIR 7 /* set if Vnode is a pseudo directory */
#define AFS_VNODE_NEW_CONTENT 8 /* Set if file has new content (create/trunc-0) */
#define AFS_VNODE_SILLY_DELETED 9 /* Set if file has been silly-deleted */
+#define AFS_VNODE_MODIFYING 10 /* Set if we're performing a modification op */
struct list_head wb_keys; /* List of keys available for writeback */
struct list_head pending_locks; /* locks waiting to be granted */
@@ -739,6 +745,7 @@ struct afs_operation_ops {
void (*issue_yfs_rpc)(struct afs_operation *op);
void (*success)(struct afs_operation *op);
void (*aborted)(struct afs_operation *op);
+ void (*failed)(struct afs_operation *op);
void (*edit_dir)(struct afs_operation *op);
void (*put)(struct afs_operation *op);
};
@@ -756,6 +763,7 @@ struct afs_vnode_param {
bool set_size:1; /* Must update i_size */
bool op_unlinked:1; /* True if file was unlinked by op */
bool speculative:1; /* T if speculative status fetch (no vnode lock) */
+ bool modification:1; /* Set if the content gets modified */
};
/*
@@ -808,12 +816,11 @@ struct afs_operation {
afs_lock_type_t type;
} lock;
struct {
- struct address_space *mapping; /* Pages being written from */
- pgoff_t first; /* first page in mapping to deal with */
- pgoff_t last; /* last page in mapping to deal with */
- unsigned first_offset; /* offset into mapping[first] */
- unsigned last_to; /* amount of mapping[last] */
- bool laundering; /* Laundering page, PG_writeback not set */
+ struct iov_iter *write_iter;
+ loff_t pos;
+ loff_t size;
+ loff_t i_size;
+ bool laundering; /* Laundering page, PG_writeback not set */
} store;
struct {
struct iattr *attr;
@@ -875,31 +882,31 @@ struct afs_vnode_cache_aux {
#define __AFS_PAGE_PRIV_MMAPPED 0x8000UL
#endif
-static inline unsigned int afs_page_dirty_resolution(void)
+static inline unsigned int afs_page_dirty_resolution(struct page *page)
{
- int shift = PAGE_SHIFT - (__AFS_PAGE_PRIV_SHIFT - 1);
+ int shift = thp_order(page) + PAGE_SHIFT - (__AFS_PAGE_PRIV_SHIFT - 1);
return (shift > 0) ? shift : 0;
}
-static inline size_t afs_page_dirty_from(unsigned long priv)
+static inline size_t afs_page_dirty_from(struct page *page, unsigned long priv)
{
unsigned long x = priv & __AFS_PAGE_PRIV_MASK;
/* The lower bound is inclusive */
- return x << afs_page_dirty_resolution();
+ return x << afs_page_dirty_resolution(page);
}
-static inline size_t afs_page_dirty_to(unsigned long priv)
+static inline size_t afs_page_dirty_to(struct page *page, unsigned long priv)
{
unsigned long x = (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK;
/* The upper bound is immediately beyond the region */
- return (x + 1) << afs_page_dirty_resolution();
+ return (x + 1) << afs_page_dirty_resolution(page);
}
-static inline unsigned long afs_page_dirty(size_t from, size_t to)
+static inline unsigned long afs_page_dirty(struct page *page, size_t from, size_t to)
{
- unsigned int res = afs_page_dirty_resolution();
+ unsigned int res = afs_page_dirty_resolution(page);
from >>= res;
to = (to - 1) >> res;
return (to << __AFS_PAGE_PRIV_SHIFT) | from;
@@ -1040,13 +1047,14 @@ extern void afs_dynroot_depopulate(struct super_block *);
extern const struct address_space_operations afs_fs_aops;
extern const struct inode_operations afs_file_inode_operations;
extern const struct file_operations afs_file_operations;
+extern const struct netfs_read_request_ops afs_req_ops;
extern int afs_cache_wb_key(struct afs_vnode *, struct afs_file *);
extern void afs_put_wb_key(struct afs_wb_key *);
extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
-extern int afs_fetch_data(struct afs_vnode *, struct key *, struct afs_read *);
-extern int afs_page_filler(void *, struct page *);
+extern int afs_fetch_data(struct afs_vnode *, struct afs_read *);
+extern struct afs_read *afs_alloc_read(gfp_t);
extern void afs_put_read(struct afs_read *);
static inline struct afs_read *afs_get_read(struct afs_read *req)
@@ -1270,6 +1278,7 @@ static inline void afs_make_op_call(struct afs_operation *op, struct afs_call *c
static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
{
+ call->iov_len = size;
call->kvec[0].iov_base = buf;
call->kvec[0].iov_len = size;
iov_iter_kvec(&call->def_iter, READ, call->kvec, 1, size);
@@ -1277,21 +1286,25 @@ static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t si
static inline void afs_extract_to_tmp(struct afs_call *call)
{
+ call->iov_len = sizeof(call->tmp);
afs_extract_begin(call, &call->tmp, sizeof(call->tmp));
}
static inline void afs_extract_to_tmp64(struct afs_call *call)
{
+ call->iov_len = sizeof(call->tmp64);
afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64));
}
static inline void afs_extract_discard(struct afs_call *call, size_t size)
{
+ call->iov_len = size;
iov_iter_discard(&call->def_iter, READ, size);
}
static inline void afs_extract_to_buf(struct afs_call *call, size_t size)
{
+ call->iov_len = size;
afs_extract_begin(call, call->buffer, size);
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 8be709cb8542..23a1a92d64bb 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -271,40 +271,6 @@ void afs_flat_call_destructor(struct afs_call *call)
call->buffer = NULL;
}
-#define AFS_BVEC_MAX 8
-
-/*
- * Load the given bvec with the next few pages.
- */
-static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
- struct bio_vec *bv, pgoff_t first, pgoff_t last,
- unsigned offset)
-{
- struct afs_operation *op = call->op;
- struct page *pages[AFS_BVEC_MAX];
- unsigned int nr, n, i, to, bytes = 0;
-
- nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX);
- n = find_get_pages_contig(op->store.mapping, first, nr, pages);
- ASSERTCMP(n, ==, nr);
-
- msg->msg_flags |= MSG_MORE;
- for (i = 0; i < nr; i++) {
- to = PAGE_SIZE;
- if (first + i >= last) {
- to = op->store.last_to;
- msg->msg_flags &= ~MSG_MORE;
- }
- bv[i].bv_page = pages[i];
- bv[i].bv_len = to - offset;
- bv[i].bv_offset = offset;
- bytes += to - offset;
- offset = 0;
- }
-
- iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes);
-}
-
/*
* Advance the AFS call state when the RxRPC call ends the transmit phase.
*/
@@ -318,42 +284,6 @@ static void afs_notify_end_request_tx(struct sock *sock,
}
/*
- * attach the data from a bunch of pages on an inode to a call
- */
-static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
-{
- struct afs_operation *op = call->op;
- struct bio_vec bv[AFS_BVEC_MAX];
- unsigned int bytes, nr, loop, offset;
- pgoff_t first = op->store.first, last = op->store.last;
- int ret;
-
- offset = op->store.first_offset;
- op->store.first_offset = 0;
-
- do {
- afs_load_bvec(call, msg, bv, first, last, offset);
- trace_afs_send_pages(call, msg, first, last, offset);
-
- offset = 0;
- bytes = msg->msg_iter.count;
- nr = msg->msg_iter.nr_segs;
-
- ret = rxrpc_kernel_send_data(op->net->socket, call->rxcall, msg,
- bytes, afs_notify_end_request_tx);
- for (loop = 0; loop < nr; loop++)
- put_page(bv[loop].bv_page);
- if (ret < 0)
- break;
-
- first += nr;
- } while (first <= last);
-
- trace_afs_sent_pages(call, op->store.first, last, first, ret);
- return ret;
-}
-
-/*
* Initiate a call and synchronously queue up the parameters for dispatch. Any
* error is stored into the call struct, which the caller must check for.
*/
@@ -363,6 +293,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
struct rxrpc_call *rxcall;
struct msghdr msg;
struct kvec iov[1];
+ size_t len;
s64 tx_total_len;
int ret;
@@ -383,21 +314,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
* after the initial fixed part.
*/
tx_total_len = call->request_size;
- if (call->send_pages) {
- struct afs_operation *op = call->op;
-
- if (op->store.last == op->store.first) {
- tx_total_len += op->store.last_to - op->store.first_offset;
- } else {
- /* It looks mathematically like you should be able to
- * combine the following lines with the ones above, but
- * unsigned arithmetic is fun when it wraps...
- */
- tx_total_len += PAGE_SIZE - op->store.first_offset;
- tx_total_len += op->store.last_to;
- tx_total_len += (op->store.last - op->store.first - 1) * PAGE_SIZE;
- }
- }
+ if (call->write_iter)
+ tx_total_len += iov_iter_count(call->write_iter);
/* If the call is going to be asynchronous, we need an extra ref for
* the call to hold itself so the caller need not hang on to its ref.
@@ -439,7 +357,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size);
msg.msg_control = NULL;
msg.msg_controllen = 0;
- msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
+ msg.msg_flags = MSG_WAITALL | (call->write_iter ? MSG_MORE : 0);
ret = rxrpc_kernel_send_data(call->net->socket, rxcall,
&msg, call->request_size,
@@ -447,8 +365,18 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
if (ret < 0)
goto error_do_abort;
- if (call->send_pages) {
- ret = afs_send_pages(call, &msg);
+ if (call->write_iter) {
+ msg.msg_iter = *call->write_iter;
+ msg.msg_flags &= ~MSG_MORE;
+ trace_afs_send_data(call, &msg);
+
+ ret = rxrpc_kernel_send_data(call->net->socket,
+ call->rxcall, &msg,
+ iov_iter_count(&msg.msg_iter),
+ afs_notify_end_request_tx);
+ *call->write_iter = msg.msg_iter;
+
+ trace_afs_sent_data(call, &msg, ret);
if (ret < 0)
goto error_do_abort;
}
@@ -466,9 +394,10 @@ error_do_abort:
rxrpc_kernel_abort_call(call->net->socket, rxcall,
RX_USER_ABORT, ret, "KSD");
} else {
+ len = 0;
iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0);
rxrpc_kernel_recv_data(call->net->socket, rxcall,
- &msg.msg_iter, false,
+ &msg.msg_iter, &len, false,
&call->abort_code, &call->service_id);
ac->abort_code = call->abort_code;
ac->responded = true;
@@ -499,11 +428,45 @@ error_kill_call:
}
/*
+ * Log remote abort codes that indicate that we have a protocol disagreement
+ * with the server.
+ */
+static void afs_log_error(struct afs_call *call, s32 remote_abort)
+{
+ static int max = 0;
+ const char *msg;
+ int m;
+
+ switch (remote_abort) {
+ case RX_EOF: msg = "unexpected EOF"; break;
+ case RXGEN_CC_MARSHAL: msg = "client marshalling"; break;
+ case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling"; break;
+ case RXGEN_SS_MARSHAL: msg = "server marshalling"; break;
+ case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling"; break;
+ case RXGEN_DECODE: msg = "opcode decode"; break;
+ case RXGEN_SS_XDRFREE: msg = "server XDR cleanup"; break;
+ case RXGEN_CC_XDRFREE: msg = "client XDR cleanup"; break;
+ case -32: msg = "insufficient data"; break;
+ default:
+ return;
+ }
+
+ m = max;
+ if (m < 3) {
+ max = m + 1;
+ pr_notice("kAFS: Peer reported %s failure on %s [%pISp]\n",
+ msg, call->type->name,
+ &call->alist->addrs[call->addr_ix].transport);
+ }
+}
+
+/*
* deliver messages to a call
*/
static void afs_deliver_to_call(struct afs_call *call)
{
enum afs_call_state state;
+ size_t len;
u32 abort_code, remote_abort = 0;
int ret;
@@ -516,10 +479,11 @@ static void afs_deliver_to_call(struct afs_call *call)
state == AFS_CALL_SV_AWAIT_ACK
) {
if (state == AFS_CALL_SV_AWAIT_ACK) {
+ len = 0;
iov_iter_kvec(&call->def_iter, READ, NULL, 0, 0);
ret = rxrpc_kernel_recv_data(call->net->socket,
call->rxcall, &call->def_iter,
- false, &remote_abort,
+ &len, false, &remote_abort,
&call->service_id);
trace_afs_receive_data(call, &call->def_iter, false, ret);
@@ -559,6 +523,7 @@ static void afs_deliver_to_call(struct afs_call *call)
goto out;
case -ECONNABORTED:
ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
+ afs_log_error(call, call->abort_code);
goto done;
case -ENOTSUPP:
abort_code = RXGEN_OPCODE;
@@ -929,10 +894,11 @@ int afs_extract_data(struct afs_call *call, bool want_more)
u32 remote_abort = 0;
int ret;
- _enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), want_more);
+ _enter("{%s,%zu,%zu},%d",
+ call->type->name, call->iov_len, iov_iter_count(iter), want_more);
ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
- want_more, &remote_abort,
+ &call->iov_len, want_more, &remote_abort,
&call->service_id);
if (ret == 0 || ret == -EAGAIN)
return ret;
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index dc9327332f06..00fca3c66ba6 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -593,6 +593,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
if (ret < 0)
return ret;
call->unmarshall = 6;
+ fallthrough;
case 6:
break;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index eb737ed63afb..3edb6204b937 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -11,6 +11,8 @@
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
+#include <linux/netfs.h>
+#include <linux/fscache.h>
#include "internal.h"
/*
@@ -23,55 +25,6 @@ int afs_set_page_dirty(struct page *page)
}
/*
- * partly or wholly fill a page that's under preparation for writing
- */
-static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
- loff_t pos, unsigned int len, struct page *page)
-{
- struct afs_read *req;
- size_t p;
- void *data;
- int ret;
-
- _enter(",,%llu", (unsigned long long)pos);
-
- if (pos >= vnode->vfs_inode.i_size) {
- p = pos & ~PAGE_MASK;
- ASSERTCMP(p + len, <=, PAGE_SIZE);
- data = kmap(page);
- memset(data + p, 0, len);
- kunmap(page);
- return 0;
- }
-
- req = kzalloc(struct_size(req, array, 1), GFP_KERNEL);
- if (!req)
- return -ENOMEM;
-
- refcount_set(&req->usage, 1);
- req->pos = pos;
- req->len = len;
- req->nr_pages = 1;
- req->pages = req->array;
- req->pages[0] = page;
- get_page(page);
-
- ret = afs_fetch_data(vnode, key, req);
- afs_put_read(req);
- if (ret < 0) {
- if (ret == -ENOENT) {
- _debug("got NOENT from server"
- " - marking file deleted and stale");
- set_bit(AFS_VNODE_DELETED, &vnode->flags);
- ret = -ESTALE;
- }
- }
-
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
* prepare to perform part of a write to a page
*/
int afs_write_begin(struct file *file, struct address_space *mapping,
@@ -80,47 +33,40 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct page *page;
- struct key *key = afs_file_key(file);
unsigned long priv;
- unsigned f, from = pos & (PAGE_SIZE - 1);
- unsigned t, to = from + len;
- pgoff_t index = pos >> PAGE_SHIFT;
+ unsigned f, from;
+ unsigned t, to;
+ pgoff_t index;
int ret;
- _enter("{%llx:%llu},{%lx},%u,%u",
- vnode->fid.vid, vnode->fid.vnode, index, from, to);
+ _enter("{%llx:%llu},%llx,%x",
+ vnode->fid.vid, vnode->fid.vnode, pos, len);
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page)
- return -ENOMEM;
+ /* Prefetch area to be written into the cache if we're caching this
+ * file. We need to do this before we get a lock on the page in case
+ * there's more than one writer competing for the same cache block.
+ */
+ ret = netfs_write_begin(file, mapping, pos, len, flags, &page, fsdata,
+ &afs_req_ops, NULL);
+ if (ret < 0)
+ return ret;
- if (!PageUptodate(page) && len != PAGE_SIZE) {
- ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page);
- if (ret < 0) {
- unlock_page(page);
- put_page(page);
- _leave(" = %d [prep]", ret);
- return ret;
- }
- SetPageUptodate(page);
- }
+ index = page->index;
+ from = pos - index * PAGE_SIZE;
+ to = from + len;
try_again:
/* See if this page is already partially written in a way that we can
* merge the new write with.
*/
- t = f = 0;
if (PagePrivate(page)) {
priv = page_private(page);
- f = afs_page_dirty_from(priv);
- t = afs_page_dirty_to(priv);
+ f = afs_page_dirty_from(page, priv);
+ t = afs_page_dirty_to(page, priv);
ASSERTCMP(f, <=, t);
- }
- if (f != t) {
if (PageWriteback(page)) {
- trace_afs_page_dirty(vnode, tracepoint_string("alrdy"),
- page->index, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("alrdy"), page);
goto flush_conflicting_write;
}
/* If the file is being filled locally, allow inter-write
@@ -164,12 +110,10 @@ int afs_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
- struct key *key = afs_file_key(file);
unsigned long priv;
- unsigned int f, from = pos & (PAGE_SIZE - 1);
+ unsigned int f, from = pos & (thp_size(page) - 1);
unsigned int t, to = from + copied;
loff_t i_size, maybe_i_size;
- int ret = 0;
_enter("{%llx:%llu},{%lx}",
vnode->fid.vid, vnode->fid.vnode, page->index);
@@ -188,88 +132,75 @@ int afs_write_end(struct file *file, struct address_space *mapping,
write_sequnlock(&vnode->cb_lock);
}
- if (!PageUptodate(page)) {
- if (copied < len) {
- /* Try and load any missing data from the server. The
- * unmarshalling routine will take care of clearing any
- * bits that are beyond the EOF.
- */
- ret = afs_fill_page(vnode, key, pos + copied,
- len - copied, page);
- if (ret < 0)
- goto out;
- }
- SetPageUptodate(page);
- }
+ ASSERT(PageUptodate(page));
if (PagePrivate(page)) {
priv = page_private(page);
- f = afs_page_dirty_from(priv);
- t = afs_page_dirty_to(priv);
+ f = afs_page_dirty_from(page, priv);
+ t = afs_page_dirty_to(page, priv);
if (from < f)
f = from;
if (to > t)
t = to;
- priv = afs_page_dirty(f, t);
+ priv = afs_page_dirty(page, f, t);
set_page_private(page, priv);
- trace_afs_page_dirty(vnode, tracepoint_string("dirty+"),
- page->index, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("dirty+"), page);
} else {
- priv = afs_page_dirty(from, to);
+ priv = afs_page_dirty(page, from, to);
attach_page_private(page, (void *)priv);
- trace_afs_page_dirty(vnode, tracepoint_string("dirty"),
- page->index, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("dirty"), page);
}
- set_page_dirty(page);
- if (PageDirty(page))
- _debug("dirtied");
- ret = copied;
+ if (set_page_dirty(page))
+ _debug("dirtied %lx", page->index);
out:
unlock_page(page);
put_page(page);
- return ret;
+ return copied;
}
/*
* kill all the pages in the given range
*/
static void afs_kill_pages(struct address_space *mapping,
- pgoff_t first, pgoff_t last)
+ loff_t start, loff_t len)
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct pagevec pv;
- unsigned count, loop;
+ unsigned int loop, psize;
- _enter("{%llx:%llu},%lx-%lx",
- vnode->fid.vid, vnode->fid.vnode, first, last);
+ _enter("{%llx:%llu},%llx @%llx",
+ vnode->fid.vid, vnode->fid.vnode, len, start);
pagevec_init(&pv);
do {
- _debug("kill %lx-%lx", first, last);
+ _debug("kill %llx @%llx", len, start);
- count = last - first + 1;
- if (count > PAGEVEC_SIZE)
- count = PAGEVEC_SIZE;
- pv.nr = find_get_pages_contig(mapping, first, count, pv.pages);
- ASSERTCMP(pv.nr, ==, count);
+ pv.nr = find_get_pages_contig(mapping, start / PAGE_SIZE,
+ PAGEVEC_SIZE, pv.pages);
+ if (pv.nr == 0)
+ break;
- for (loop = 0; loop < count; loop++) {
+ for (loop = 0; loop < pv.nr; loop++) {
struct page *page = pv.pages[loop];
+
+ if (page->index * PAGE_SIZE >= start + len)
+ break;
+
+ psize = thp_size(page);
+ start += psize;
+ len -= psize;
ClearPageUptodate(page);
- SetPageError(page);
end_page_writeback(page);
- if (page->index >= first)
- first = page->index + 1;
lock_page(page);
generic_error_remove_page(mapping, page);
unlock_page(page);
}
__pagevec_release(&pv);
- } while (first <= last);
+ } while (len > 0);
_leave("");
}
@@ -279,37 +210,40 @@ static void afs_kill_pages(struct address_space *mapping,
*/
static void afs_redirty_pages(struct writeback_control *wbc,
struct address_space *mapping,
- pgoff_t first, pgoff_t last)
+ loff_t start, loff_t len)
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct pagevec pv;
- unsigned count, loop;
+ unsigned int loop, psize;
- _enter("{%llx:%llu},%lx-%lx",
- vnode->fid.vid, vnode->fid.vnode, first, last);
+ _enter("{%llx:%llu},%llx @%llx",
+ vnode->fid.vid, vnode->fid.vnode, len, start);
pagevec_init(&pv);
do {
- _debug("redirty %lx-%lx", first, last);
+ _debug("redirty %llx @%llx", len, start);
- count = last - first + 1;
- if (count > PAGEVEC_SIZE)
- count = PAGEVEC_SIZE;
- pv.nr = find_get_pages_contig(mapping, first, count, pv.pages);
- ASSERTCMP(pv.nr, ==, count);
+ pv.nr = find_get_pages_contig(mapping, start / PAGE_SIZE,
+ PAGEVEC_SIZE, pv.pages);
+ if (pv.nr == 0)
+ break;
- for (loop = 0; loop < count; loop++) {
+ for (loop = 0; loop < pv.nr; loop++) {
struct page *page = pv.pages[loop];
+ if (page->index * PAGE_SIZE >= start + len)
+ break;
+
+ psize = thp_size(page);
+ start += psize;
+ len -= psize;
redirty_page_for_writepage(wbc, page);
end_page_writeback(page);
- if (page->index >= first)
- first = page->index + 1;
}
__pagevec_release(&pv);
- } while (first <= last);
+ } while (len > 0);
_leave("");
}
@@ -317,37 +251,32 @@ static void afs_redirty_pages(struct writeback_control *wbc,
/*
* completion of write to server
*/
-static void afs_pages_written_back(struct afs_vnode *vnode,
- pgoff_t first, pgoff_t last)
+static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
{
- struct pagevec pv;
- unsigned long priv;
- unsigned count, loop;
+ struct address_space *mapping = vnode->vfs_inode.i_mapping;
+ struct page *page;
+ pgoff_t end;
- _enter("{%llx:%llu},{%lx-%lx}",
- vnode->fid.vid, vnode->fid.vnode, first, last);
+ XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
- pagevec_init(&pv);
+ _enter("{%llx:%llu},{%x @%llx}",
+ vnode->fid.vid, vnode->fid.vnode, len, start);
- do {
- _debug("done %lx-%lx", first, last);
-
- count = last - first + 1;
- if (count > PAGEVEC_SIZE)
- count = PAGEVEC_SIZE;
- pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
- first, count, pv.pages);
- ASSERTCMP(pv.nr, ==, count);
-
- for (loop = 0; loop < count; loop++) {
- priv = (unsigned long)detach_page_private(pv.pages[loop]);
- trace_afs_page_dirty(vnode, tracepoint_string("clear"),
- pv.pages[loop]->index, priv);
- end_page_writeback(pv.pages[loop]);
+ rcu_read_lock();
+
+ end = (start + len - 1) / PAGE_SIZE;
+ xas_for_each(&xas, page, end) {
+ if (!PageWriteback(page)) {
+ kdebug("bad %x @%llx page %lx %lx", len, start, page->index, end);
+ ASSERT(PageWriteback(page));
}
- first += count;
- __pagevec_release(&pv);
- } while (first <= last);
+
+ trace_afs_page_dirty(vnode, tracepoint_string("clear"), page);
+ detach_page_private(page);
+ page_endio(page, true, 0);
+ }
+
+ rcu_read_unlock();
afs_prune_wb_keys(vnode);
_leave("");
@@ -402,11 +331,9 @@ static void afs_store_data_success(struct afs_operation *op)
afs_vnode_commit_status(op, &op->file[0]);
if (op->error == 0) {
if (!op->store.laundering)
- afs_pages_written_back(vnode, op->store.first, op->store.last);
+ afs_pages_written_back(vnode, op->store.pos, op->store.size);
afs_stat_v(vnode, n_stores);
- atomic_long_add((op->store.last * PAGE_SIZE + op->store.last_to) -
- (op->store.first * PAGE_SIZE + op->store.first_offset),
- &afs_v2net(vnode)->n_store_bytes);
+ atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes);
}
}
@@ -419,21 +346,20 @@ static const struct afs_operation_ops afs_store_data_operation = {
/*
* write to a file
*/
-static int afs_store_data(struct address_space *mapping,
- pgoff_t first, pgoff_t last,
- unsigned offset, unsigned to, bool laundering)
+static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos,
+ bool laundering)
{
- struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct afs_operation *op;
struct afs_wb_key *wbk = NULL;
- int ret;
+ loff_t size = iov_iter_count(iter), i_size;
+ int ret = -ENOKEY;
- _enter("%s{%llx:%llu.%u},%lx,%lx,%x,%x",
+ _enter("%s{%llx:%llu.%u},%llx,%llx",
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
- first, last, offset, to);
+ size, pos);
ret = afs_get_writeback_key(vnode, &wbk);
if (ret) {
@@ -447,13 +373,15 @@ static int afs_store_data(struct address_space *mapping,
return -ENOMEM;
}
+ i_size = i_size_read(&vnode->vfs_inode);
+
afs_op_set_vnode(op, 0, vnode);
op->file[0].dv_delta = 1;
- op->store.mapping = mapping;
- op->store.first = first;
- op->store.last = last;
- op->store.first_offset = offset;
- op->store.last_to = to;
+ op->file[0].modification = true;
+ op->store.write_iter = iter;
+ op->store.pos = pos;
+ op->store.size = size;
+ op->store.i_size = max(pos + size, i_size);
op->store.laundering = laundering;
op->mtime = vnode->vfs_inode.i_mtime;
op->flags |= AFS_OPERATION_UNINTR;
@@ -487,73 +415,58 @@ try_next_key:
}
/*
- * Synchronously write back the locked page and any subsequent non-locked dirty
- * pages.
+ * Extend the region to be written back to include subsequent contiguously
+ * dirty pages if possible, but don't sleep while doing so.
+ *
+ * If this page holds new content, then we can include filler zeros in the
+ * writeback.
*/
-static int afs_write_back_from_locked_page(struct address_space *mapping,
- struct writeback_control *wbc,
- struct page *primary_page,
- pgoff_t final_page)
+static void afs_extend_writeback(struct address_space *mapping,
+ struct afs_vnode *vnode,
+ long *_count,
+ loff_t start,
+ loff_t max_len,
+ bool new_content,
+ unsigned int *_len)
{
- struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- struct page *pages[8], *page;
- unsigned long count, priv;
- unsigned n, offset, to, f, t;
- pgoff_t start, first, last;
- loff_t i_size, end;
- int loop, ret;
-
- _enter(",%lx", primary_page->index);
+ struct pagevec pvec;
+ struct page *page;
+ unsigned long priv;
+ unsigned int psize, filler = 0;
+ unsigned int f, t;
+ loff_t len = *_len;
+ pgoff_t index = (start + len) / PAGE_SIZE;
+ bool stop = true;
+ unsigned int i;
- count = 1;
- if (test_set_page_writeback(primary_page))
- BUG();
+ XA_STATE(xas, &mapping->i_pages, index);
+ pagevec_init(&pvec);
- /* Find all consecutive lockable dirty pages that have contiguous
- * written regions, stopping when we find a page that is not
- * immediately lockable, is not dirty or is missing, or we reach the
- * end of the range.
- */
- start = primary_page->index;
- priv = page_private(primary_page);
- offset = afs_page_dirty_from(priv);
- to = afs_page_dirty_to(priv);
- trace_afs_page_dirty(vnode, tracepoint_string("store"),
- primary_page->index, priv);
-
- WARN_ON(offset == to);
- if (offset == to)
- trace_afs_page_dirty(vnode, tracepoint_string("WARN"),
- primary_page->index, priv);
-
- if (start >= final_page ||
- (to < PAGE_SIZE && !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)))
- goto no_more;
-
- start++;
do {
- _debug("more %lx [%lx]", start, count);
- n = final_page - start + 1;
- if (n > ARRAY_SIZE(pages))
- n = ARRAY_SIZE(pages);
- n = find_get_pages_contig(mapping, start, ARRAY_SIZE(pages), pages);
- _debug("fgpc %u", n);
- if (n == 0)
- goto no_more;
- if (pages[0]->index != start) {
- do {
- put_page(pages[--n]);
- } while (n > 0);
- goto no_more;
- }
+ /* Firstly, we gather up a batch of contiguous dirty pages
+ * under the RCU read lock - but we can't clear the dirty flags
+ * there if any of those pages are mapped.
+ */
+ rcu_read_lock();
- for (loop = 0; loop < n; loop++) {
- page = pages[loop];
- if (to != PAGE_SIZE &&
- !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags))
+ xas_for_each(&xas, page, ULONG_MAX) {
+ stop = true;
+ if (xas_retry(&xas, page))
+ continue;
+ if (xa_is_value(page))
break;
- if (page->index > final_page)
+ if (page->index != index)
break;
+
+ if (!page_cache_get_speculative(page)) {
+ xas_reset(&xas);
+ continue;
+ }
+
+ /* Has the page moved or been split? */
+ if (unlikely(page != xas_reload(&xas)))
+ break;
+
if (!trylock_page(page))
break;
if (!PageDirty(page) || PageWriteback(page)) {
@@ -561,57 +474,134 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
break;
}
+ psize = thp_size(page);
priv = page_private(page);
- f = afs_page_dirty_from(priv);
- t = afs_page_dirty_to(priv);
- if (f != 0 &&
- !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) {
+ f = afs_page_dirty_from(page, priv);
+ t = afs_page_dirty_to(page, priv);
+ if (f != 0 && !new_content) {
unlock_page(page);
break;
}
- to = t;
- trace_afs_page_dirty(vnode, tracepoint_string("store+"),
- page->index, priv);
+ len += filler + t;
+ filler = psize - t;
+ if (len >= max_len || *_count <= 0)
+ stop = true;
+ else if (t == psize || new_content)
+ stop = false;
+
+ index += thp_nr_pages(page);
+ if (!pagevec_add(&pvec, page))
+ break;
+ if (stop)
+ break;
+ }
+
+ if (!stop)
+ xas_pause(&xas);
+ rcu_read_unlock();
+
+ /* Now, if we obtained any pages, we can shift them to being
+ * writable and mark them for caching.
+ */
+ if (!pagevec_count(&pvec))
+ break;
+
+ for (i = 0; i < pagevec_count(&pvec); i++) {
+ page = pvec.pages[i];
+ trace_afs_page_dirty(vnode, tracepoint_string("store+"), page);
if (!clear_page_dirty_for_io(page))
BUG();
if (test_set_page_writeback(page))
BUG();
+
+ *_count -= thp_nr_pages(page);
unlock_page(page);
- put_page(page);
- }
- count += loop;
- if (loop < n) {
- for (; loop < n; loop++)
- put_page(pages[loop]);
- goto no_more;
}
- start += loop;
- } while (start <= final_page && count < 65536);
+ pagevec_release(&pvec);
+ cond_resched();
+ } while (!stop);
+
+ *_len = len;
+}
+
+/*
+ * Synchronously write back the locked page and any subsequent non-locked dirty
+ * pages.
+ */
+static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct page *page,
+ loff_t start, loff_t end)
+{
+ struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+ struct iov_iter iter;
+ unsigned long priv;
+ unsigned int offset, to, len, max_len;
+ loff_t i_size = i_size_read(&vnode->vfs_inode);
+ bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+ long count = wbc->nr_to_write;
+ int ret;
+
+ _enter(",%lx,%llx-%llx", page->index, start, end);
+
+ if (test_set_page_writeback(page))
+ BUG();
+
+ count -= thp_nr_pages(page);
+
+ /* Find all consecutive lockable dirty pages that have contiguous
+ * written regions, stopping when we find a page that is not
+ * immediately lockable, is not dirty or is missing, or we reach the
+ * end of the range.
+ */
+ priv = page_private(page);
+ offset = afs_page_dirty_from(page, priv);
+ to = afs_page_dirty_to(page, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("store"), page);
+
+ len = to - offset;
+ start += offset;
+ if (start < i_size) {
+ /* Trim the write to the EOF; the extra data is ignored. Also
+ * put an upper limit on the size of a single storedata op.
+ */
+ max_len = 65536 * 4096;
+ max_len = min_t(unsigned long long, max_len, end - start + 1);
+ max_len = min_t(unsigned long long, max_len, i_size - start);
+
+ if (len < max_len &&
+ (to == thp_size(page) || new_content))
+ afs_extend_writeback(mapping, vnode, &count,
+ start, max_len, new_content, &len);
+ len = min_t(loff_t, len, max_len);
+ }
-no_more:
/* We now have a contiguous set of dirty pages, each with writeback
* set; the first page is still locked at this point, but all the rest
* have been unlocked.
*/
- unlock_page(primary_page);
+ unlock_page(page);
- first = primary_page->index;
- last = first + count - 1;
+ if (start < i_size) {
+ _debug("write back %x @%llx [%llx]", len, start, i_size);
- end = (loff_t)last * PAGE_SIZE + to;
- i_size = i_size_read(&vnode->vfs_inode);
+ iov_iter_xarray(&iter, WRITE, &mapping->i_pages, start, len);
+ ret = afs_store_data(vnode, &iter, start, false);
+ } else {
+ _debug("write discard %x @%llx [%llx]", len, start, i_size);
- _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
- if (end > i_size)
- to = i_size & ~PAGE_MASK;
+ /* The dirty region was entirely beyond the EOF. */
+ afs_pages_written_back(vnode, start, len);
+ ret = 0;
+ }
- ret = afs_store_data(mapping, first, last, offset, to, false);
switch (ret) {
case 0:
- ret = count;
+ wbc->nr_to_write = count;
+ ret = len;
break;
default:
@@ -623,13 +613,13 @@ no_more:
case -EKEYEXPIRED:
case -EKEYREJECTED:
case -EKEYREVOKED:
- afs_redirty_pages(wbc, mapping, first, last);
+ afs_redirty_pages(wbc, mapping, start, len);
mapping_set_error(mapping, ret);
break;
case -EDQUOT:
case -ENOSPC:
- afs_redirty_pages(wbc, mapping, first, last);
+ afs_redirty_pages(wbc, mapping, start, len);
mapping_set_error(mapping, -ENOSPC);
break;
@@ -641,7 +631,7 @@ no_more:
case -ENOMEDIUM:
case -ENXIO:
trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
- afs_kill_pages(mapping, first, last);
+ afs_kill_pages(mapping, start, len);
mapping_set_error(mapping, ret);
break;
}
@@ -656,19 +646,19 @@ no_more:
*/
int afs_writepage(struct page *page, struct writeback_control *wbc)
{
- int ret;
+ ssize_t ret;
+ loff_t start;
_enter("{%lx},", page->index);
+ start = page->index * PAGE_SIZE;
ret = afs_write_back_from_locked_page(page->mapping, wbc, page,
- wbc->range_end >> PAGE_SHIFT);
+ start, LLONG_MAX - start);
if (ret < 0) {
- _leave(" = %d", ret);
- return 0;
+ _leave(" = %zd", ret);
+ return ret;
}
- wbc->nr_to_write -= ret;
-
_leave(" = 0");
return 0;
}
@@ -678,35 +668,46 @@ int afs_writepage(struct page *page, struct writeback_control *wbc)
*/
static int afs_writepages_region(struct address_space *mapping,
struct writeback_control *wbc,
- pgoff_t index, pgoff_t end, pgoff_t *_next)
+ loff_t start, loff_t end, loff_t *_next)
{
struct page *page;
- int ret, n;
+ ssize_t ret;
+ int n;
- _enter(",,%lx,%lx,", index, end);
+ _enter("%llx,%llx,", start, end);
do {
- n = find_get_pages_range_tag(mapping, &index, end,
- PAGECACHE_TAG_DIRTY, 1, &page);
+ pgoff_t index = start / PAGE_SIZE;
+
+ n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
+ PAGECACHE_TAG_DIRTY, 1, &page);
if (!n)
break;
+ start = (loff_t)page->index * PAGE_SIZE; /* May regress with THPs */
+
_debug("wback %lx", page->index);
- /*
- * at this point we hold neither the i_pages lock nor the
+ /* At this point we hold neither the i_pages lock nor the
* page lock: the page may be truncated or invalidated
* (changing page->mapping to NULL), or even swizzled
* back from swapper_space to tmpfs file mapping
*/
- ret = lock_page_killable(page);
- if (ret < 0) {
- put_page(page);
- _leave(" = %d", ret);
- return ret;
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ ret = lock_page_killable(page);
+ if (ret < 0) {
+ put_page(page);
+ return ret;
+ }
+ } else {
+ if (!trylock_page(page)) {
+ put_page(page);
+ return 0;
+ }
}
if (page->mapping != mapping || !PageDirty(page)) {
+ start += thp_size(page);
unlock_page(page);
put_page(page);
continue;
@@ -722,20 +723,20 @@ static int afs_writepages_region(struct address_space *mapping,
if (!clear_page_dirty_for_io(page))
BUG();
- ret = afs_write_back_from_locked_page(mapping, wbc, page, end);
+ ret = afs_write_back_from_locked_page(mapping, wbc, page, start, end);
put_page(page);
if (ret < 0) {
- _leave(" = %d", ret);
+ _leave(" = %zd", ret);
return ret;
}
- wbc->nr_to_write -= ret;
+ start += ret * PAGE_SIZE;
cond_resched();
- } while (index < end && wbc->nr_to_write > 0);
+ } while (wbc->nr_to_write > 0);
- *_next = index;
- _leave(" = 0 [%lx]", *_next);
+ *_next = start;
+ _leave(" = 0 [%llx]", *_next);
return 0;
}
@@ -746,7 +747,7 @@ int afs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
- pgoff_t start, end, next;
+ loff_t start, next;
int ret;
_enter("");
@@ -761,22 +762,19 @@ int afs_writepages(struct address_space *mapping,
return 0;
if (wbc->range_cyclic) {
- start = mapping->writeback_index;
- end = -1;
- ret = afs_writepages_region(mapping, wbc, start, end, &next);
+ start = mapping->writeback_index * PAGE_SIZE;
+ ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
if (start > 0 && wbc->nr_to_write > 0 && ret == 0)
ret = afs_writepages_region(mapping, wbc, 0, start,
&next);
- mapping->writeback_index = next;
+ mapping->writeback_index = next / PAGE_SIZE;
} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
- end = (pgoff_t)(LLONG_MAX >> PAGE_SHIFT);
- ret = afs_writepages_region(mapping, wbc, 0, end, &next);
+ ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
if (wbc->nr_to_write > 0)
mapping->writeback_index = next;
} else {
- start = wbc->range_start >> PAGE_SHIFT;
- end = wbc->range_end >> PAGE_SHIFT;
- ret = afs_writepages_region(mapping, wbc, start, end, &next);
+ ret = afs_writepages_region(mapping, wbc,
+ wbc->range_start, wbc->range_end, &next);
}
up_read(&vnode->validate_lock);
@@ -834,13 +832,13 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
*/
vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
{
+ struct page *page = thp_head(vmf->page);
struct file *file = vmf->vma->vm_file;
struct inode *inode = file_inode(file);
struct afs_vnode *vnode = AFS_FS_I(inode);
unsigned long priv;
- _enter("{{%llx:%llu}},{%lx}",
- vnode->fid.vid, vnode->fid.vnode, vmf->page->index);
+ _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index);
sb_start_pagefault(inode->i_sb);
@@ -848,29 +846,35 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
* be modified. We then assume the entire page will need writing back.
*/
#ifdef CONFIG_AFS_FSCACHE
- fscache_wait_on_page_write(vnode->cache, vmf->page);
+ if (PageFsCache(page) &&
+ wait_on_page_fscache_killable(page) < 0)
+ return VM_FAULT_RETRY;
#endif
- if (wait_on_page_writeback_killable(vmf->page))
+ if (wait_on_page_writeback_killable(page))
return VM_FAULT_RETRY;
- if (lock_page_killable(vmf->page) < 0)
+ if (lock_page_killable(page) < 0)
return VM_FAULT_RETRY;
/* We mustn't change page->private until writeback is complete as that
* details the portion of the page we need to write back and we might
* need to redirty the page if there's a problem.
*/
- wait_on_page_writeback(vmf->page);
+ if (wait_on_page_writeback_killable(page) < 0) {
+ unlock_page(page);
+ return VM_FAULT_RETRY;
+ }
- priv = afs_page_dirty(0, PAGE_SIZE);
+ priv = afs_page_dirty(page, 0, thp_size(page));
priv = afs_page_dirty_mmapped(priv);
- trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"),
- vmf->page->index, priv);
- if (PagePrivate(vmf->page))
- set_page_private(vmf->page, priv);
- else
- attach_page_private(vmf->page, (void *)priv);
+ if (PagePrivate(page)) {
+ set_page_private(page, priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("mkwrite+"), page);
+ } else {
+ attach_page_private(page, (void *)priv);
+ trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), page);
+ }
file_update_time(file);
sb_end_pagefault(inode->i_sb);
@@ -912,6 +916,8 @@ int afs_launder_page(struct page *page)
{
struct address_space *mapping = page->mapping;
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+ struct iov_iter iter;
+ struct bio_vec bv[1];
unsigned long priv;
unsigned int f, t;
int ret = 0;
@@ -921,26 +927,24 @@ int afs_launder_page(struct page *page)
priv = page_private(page);
if (clear_page_dirty_for_io(page)) {
f = 0;
- t = PAGE_SIZE;
+ t = thp_size(page);
if (PagePrivate(page)) {
- f = afs_page_dirty_from(priv);
- t = afs_page_dirty_to(priv);
+ f = afs_page_dirty_from(page, priv);
+ t = afs_page_dirty_to(page, priv);
}
- trace_afs_page_dirty(vnode, tracepoint_string("launder"),
- page->index, priv);
- ret = afs_store_data(mapping, page->index, page->index, t, f, true);
- }
-
- priv = (unsigned long)detach_page_private(page);
- trace_afs_page_dirty(vnode, tracepoint_string("laundered"),
- page->index, priv);
+ bv[0].bv_page = page;
+ bv[0].bv_offset = f;
+ bv[0].bv_len = t - f;
+ iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len);
-#ifdef CONFIG_AFS_FSCACHE
- if (PageFsCache(page)) {
- fscache_wait_on_page_write(vnode->cache, page);
- fscache_uncache_page(vnode->cache, page);
+ trace_afs_page_dirty(vnode, tracepoint_string("launder"), page);
+ ret = afs_store_data(vnode, &iter, (loff_t)page->index * PAGE_SIZE,
+ true);
}
-#endif
+
+ trace_afs_page_dirty(vnode, tracepoint_string("laundered"), page);
+ detach_page_private(page);
+ wait_on_page_fscache(page);
return ret;
}
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index bd787e71a657..2b35cba8ad62 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -360,22 +360,23 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
struct afs_vnode_param *vp = &op->file[0];
struct afs_read *req = op->fetch.req;
const __be32 *bp;
- unsigned int size;
int ret;
- _enter("{%u,%zu/%llu}",
- call->unmarshall, iov_iter_count(call->iter), req->actual_len);
+ _enter("{%u,%zu, %zu/%llu}",
+ call->unmarshall, call->iov_len, iov_iter_count(call->iter),
+ req->actual_len);
switch (call->unmarshall) {
case 0:
req->actual_len = 0;
- req->index = 0;
- req->offset = req->pos & (PAGE_SIZE - 1);
afs_extract_to_tmp64(call);
call->unmarshall++;
fallthrough;
- /* extract the returned data length */
+ /* Extract the returned data length into ->actual_len. This
+ * may indicate more or less data than was requested will be
+ * returned.
+ */
case 1:
_debug("extract data length");
ret = afs_extract_data(call, true);
@@ -384,44 +385,25 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
req->actual_len = be64_to_cpu(call->tmp64);
_debug("DATA length: %llu", req->actual_len);
- req->remain = min(req->len, req->actual_len);
- if (req->remain == 0)
+
+ if (req->actual_len == 0)
goto no_more_data;
+ call->iter = req->iter;
+ call->iov_len = min(req->actual_len, req->len);
call->unmarshall++;
-
- begin_page:
- ASSERTCMP(req->index, <, req->nr_pages);
- if (req->remain > PAGE_SIZE - req->offset)
- size = PAGE_SIZE - req->offset;
- else
- size = req->remain;
- call->bvec[0].bv_len = size;
- call->bvec[0].bv_offset = req->offset;
- call->bvec[0].bv_page = req->pages[req->index];
- iov_iter_bvec(&call->def_iter, READ, call->bvec, 1, size);
- ASSERTCMP(size, <=, PAGE_SIZE);
fallthrough;
/* extract the returned data */
case 2:
_debug("extract data %zu/%llu",
- iov_iter_count(call->iter), req->remain);
+ iov_iter_count(call->iter), req->actual_len);
ret = afs_extract_data(call, true);
if (ret < 0)
return ret;
- req->remain -= call->bvec[0].bv_len;
- req->offset += call->bvec[0].bv_len;
- ASSERTCMP(req->offset, <=, PAGE_SIZE);
- if (req->offset == PAGE_SIZE) {
- req->offset = 0;
- req->index++;
- if (req->remain > 0)
- goto begin_page;
- }
- ASSERTCMP(req->remain, ==, 0);
+ call->iter = &call->def_iter;
if (req->actual_len <= req->len)
goto no_more_data;
@@ -467,17 +449,6 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
break;
}
- for (; req->index < req->nr_pages; req->index++) {
- if (req->offset < PAGE_SIZE)
- zero_user_segment(req->pages[req->index],
- req->offset, PAGE_SIZE);
- req->offset = 0;
- }
-
- if (req->page_done)
- for (req->index = 0; req->index < req->nr_pages; req->index++)
- req->page_done(req);
-
_leave(" = 0 [done]");
return 0;
}
@@ -516,6 +487,8 @@ void yfs_fs_fetch_data(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
+ req->call_debug_id = call->debug_id;
+
/* marshall the parameters */
bp = call->request;
bp = xdr_encode_u32(bp, YFSFETCHDATA64);
@@ -1102,25 +1075,15 @@ void yfs_fs_store_data(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
struct afs_call *call;
- loff_t size, pos, i_size;
__be32 *bp;
_enter(",%x,{%llx:%llu},,",
key_serial(op->key), vp->fid.vid, vp->fid.vnode);
- size = (loff_t)op->store.last_to - (loff_t)op->store.first_offset;
- if (op->store.first != op->store.last)
- size += (loff_t)(op->store.last - op->store.first) << PAGE_SHIFT;
- pos = (loff_t)op->store.first << PAGE_SHIFT;
- pos += op->store.first_offset;
-
- i_size = i_size_read(&vp->vnode->vfs_inode);
- if (pos + size > i_size)
- i_size = size + pos;
-
_debug("size %llx, at %llx, i_size %llx",
- (unsigned long long)size, (unsigned long long)pos,
- (unsigned long long)i_size);
+ (unsigned long long)op->store.size,
+ (unsigned long long)op->store.pos,
+ (unsigned long long)op->store.i_size);
call = afs_alloc_flat_call(op->net, &yfs_RXYFSStoreData64,
sizeof(__be32) +
@@ -1133,8 +1096,7 @@ void yfs_fs_store_data(struct afs_operation *op)
if (!call)
return afs_op_nomem(op);
- call->key = op->key;
- call->send_pages = true;
+ call->write_iter = op->store.write_iter;
/* marshall the parameters */
bp = call->request;
@@ -1142,9 +1104,9 @@ void yfs_fs_store_data(struct afs_operation *op)
bp = xdr_encode_u32(bp, 0); /* RPC flags */
bp = xdr_encode_YFSFid(bp, &vp->fid);
bp = xdr_encode_YFSStoreStatus_mtime(bp, &op->mtime);
- bp = xdr_encode_u64(bp, pos);
- bp = xdr_encode_u64(bp, size);
- bp = xdr_encode_u64(bp, i_size);
+ bp = xdr_encode_u64(bp, op->store.pos);
+ bp = xdr_encode_u64(bp, op->store.size);
+ bp = xdr_encode_u64(bp, op->store.i_size);
yfs_check_req(call, bp);
trace_afs_make_fs_call(call, &vp->fid);
diff --git a/fs/aio.c b/fs/aio.c
index 1f32da13d39e..76ce0cc3ee4e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -323,16 +323,13 @@ static void aio_free_ring(struct kioctx *ctx)
}
}
-static int aio_ring_mremap(struct vm_area_struct *vma, unsigned long flags)
+static int aio_ring_mremap(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
struct mm_struct *mm = vma->vm_mm;
struct kioctx_table *table;
int i, res = -EINVAL;
- if (flags & MREMAP_DONTUNMAP)
- return -EINVAL;
-
spin_lock(&mm->ioctx_lock);
rcu_read_lock();
table = rcu_dereference(mm->ioctx_table);
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 054f97b07754..918826eaceea 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -87,6 +87,7 @@ struct autofs_wait_queue {
autofs_wqt_t wait_queue_token;
/* We use the following to see what we are waiting for */
struct qstr name;
+ u32 offset;
u32 dev;
u64 ino;
kuid_t uid;
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index a1c7701007e7..b3fefd6237c3 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -355,7 +355,7 @@ static struct dentry *should_expire(struct dentry *dentry,
return NULL;
}
- if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
+ if (d_is_symlink(dentry)) {
pr_debug("checking symlink %p %pd\n", dentry, dentry);
/* Forced expire, user space handles busy mounts */
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 5ced859dac53..16b5fca0626e 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -30,7 +30,7 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
while (wq) {
nwq = wq->next;
wq->status = -ENOENT; /* Magic is gone - report failure */
- kfree(wq->name.name);
+ kfree(wq->name.name - wq->offset);
wq->name.name = NULL;
wq->wait_ctr--;
wake_up_interruptible(&wq->queue);
@@ -175,51 +175,6 @@ static void autofs_notify_daemon(struct autofs_sb_info *sbi,
fput(pipe);
}
-static int autofs_getpath(struct autofs_sb_info *sbi,
- struct dentry *dentry, char *name)
-{
- struct dentry *root = sbi->sb->s_root;
- struct dentry *tmp;
- char *buf;
- char *p;
- int len;
- unsigned seq;
-
-rename_retry:
- buf = name;
- len = 0;
-
- seq = read_seqbegin(&rename_lock);
- rcu_read_lock();
- spin_lock(&sbi->fs_lock);
- for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
- len += tmp->d_name.len + 1;
-
- if (!len || --len > NAME_MAX) {
- spin_unlock(&sbi->fs_lock);
- rcu_read_unlock();
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
- return 0;
- }
-
- *(buf + len) = '\0';
- p = buf + len - dentry->d_name.len;
- strncpy(p, dentry->d_name.name, dentry->d_name.len);
-
- for (tmp = dentry->d_parent; tmp != root ; tmp = tmp->d_parent) {
- *(--p) = '/';
- p -= tmp->d_name.len;
- strncpy(p, tmp->d_name.name, tmp->d_name.len);
- }
- spin_unlock(&sbi->fs_lock);
- rcu_read_unlock();
- if (read_seqretry(&rename_lock, seq))
- goto rename_retry;
-
- return len;
-}
-
static struct autofs_wait_queue *
autofs_find_wait(struct autofs_sb_info *sbi, const struct qstr *qstr)
{
@@ -352,6 +307,7 @@ int autofs_wait(struct autofs_sb_info *sbi,
struct qstr qstr;
char *name;
int status, ret, type;
+ unsigned int offset = 0;
pid_t pid;
pid_t tgid;
@@ -389,20 +345,23 @@ int autofs_wait(struct autofs_sb_info *sbi,
return -ENOMEM;
/* If this is a direct mount request create a dummy name */
- if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type))
+ if (IS_ROOT(dentry) && autofs_type_trigger(sbi->type)) {
+ qstr.name = name;
qstr.len = sprintf(name, "%p", dentry);
- else {
- qstr.len = autofs_getpath(sbi, dentry, name);
- if (!qstr.len) {
+ } else {
+ char *p = dentry_path_raw(dentry, name, NAME_MAX);
+ if (IS_ERR(p)) {
kfree(name);
return -ENOENT;
}
+ qstr.name = ++p; // skip the leading slash
+ qstr.len = strlen(p);
+ offset = p - name;
}
- qstr.name = name;
qstr.hash = full_name_hash(dentry, name, qstr.len);
if (mutex_lock_interruptible(&sbi->wq_mutex)) {
- kfree(qstr.name);
+ kfree(name);
return -EINTR;
}
@@ -410,7 +369,7 @@ int autofs_wait(struct autofs_sb_info *sbi,
if (ret <= 0) {
if (ret != -EINTR)
mutex_unlock(&sbi->wq_mutex);
- kfree(qstr.name);
+ kfree(name);
return ret;
}
@@ -418,7 +377,7 @@ int autofs_wait(struct autofs_sb_info *sbi,
/* Create a new wait queue */
wq = kmalloc(sizeof(struct autofs_wait_queue), GFP_KERNEL);
if (!wq) {
- kfree(qstr.name);
+ kfree(name);
mutex_unlock(&sbi->wq_mutex);
return -ENOMEM;
}
@@ -430,6 +389,7 @@ int autofs_wait(struct autofs_sb_info *sbi,
sbi->queues = wq;
init_waitqueue_head(&wq->queue);
memcpy(&wq->name, &qstr, sizeof(struct qstr));
+ wq->offset = offset;
wq->dev = autofs_get_dev(sbi);
wq->ino = autofs_get_ino(sbi);
wq->uid = current_uid();
@@ -469,7 +429,7 @@ int autofs_wait(struct autofs_sb_info *sbi,
(unsigned long) wq->wait_queue_token, wq->name.len,
wq->name.name, notify);
mutex_unlock(&sbi->wq_mutex);
- kfree(qstr.name);
+ kfree(name);
}
/*
@@ -540,7 +500,7 @@ int autofs_wait_release(struct autofs_sb_info *sbi,
}
*wql = wq->next; /* Unlink from chain */
- kfree(wq->name.name);
+ kfree(wq->name.name - wq->offset);
wq->name.name = NULL; /* Do not wait on this queue */
wq->status = status;
wake_up(&wq->queue);
diff --git a/fs/befs/TODO b/fs/befs/TODO
deleted file mode 100644
index 3250921aa2e6..000000000000
--- a/fs/befs/TODO
+++ /dev/null
@@ -1,14 +0,0 @@
-TODO
-==========
-
-* Convert comments to the Kernel-Doc format.
-
-* Befs_fs.h has gotten big and messy. No reason not to break it up into
- smaller peices.
-
-* See if Alexander Viro's option parser made it into the kernel tree.
- Use that if we can. (include/linux/parser.h)
-
-* See if we really need separate types for on-disk and in-memory
- representations of the superblock and inode.
-
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b12ba98ae9f5..187b3f2b9202 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2267,8 +2267,7 @@ static int elf_core_dump(struct coredump_params *cprm)
goto end_coredump;
/* Align to page */
- if (!dump_skip(cprm, dataoff - cprm->pos))
- goto end_coredump;
+ dump_skip_to(cprm, dataoff);
for (i = 0; i < vma_count; i++) {
struct core_vma_metadata *meta = vma_meta + i;
@@ -2276,7 +2275,6 @@ static int elf_core_dump(struct coredump_params *cprm)
if (!dump_user_range(cprm, meta->start, meta->dump_size))
goto end_coredump;
}
- dump_truncate(cprm);
if (!elf_core_write_extra_data(cprm))
goto end_coredump;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 3cfd6cd46f26..2c99b102c860 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1631,8 +1631,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
goto end_coredump;
}
- if (!dump_skip(cprm, dataoff - cprm->pos))
- goto end_coredump;
+ dump_skip_to(cprm, dataoff);
if (!elf_fdpic_dump_segments(cprm, vma_meta, vma_count))
goto end_coredump;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index b9c658e0548e..a1072c6a2341 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -74,6 +74,12 @@
#define MAX_SHARED_LIBS (1)
#endif
+#ifdef CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET
+#define DATA_START_OFFSET_WORDS (0)
+#else
+#define DATA_START_OFFSET_WORDS (MAX_SHARED_LIBS)
+#endif
+
struct lib_info {
struct {
unsigned long start_code; /* Start of text segment */
@@ -576,7 +582,8 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
- len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
+ len = data_len + extra +
+ DATA_START_OFFSET_WORDS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
realdatastart = vm_mmap(NULL, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
@@ -591,7 +598,7 @@ static int load_flat_file(struct linux_binprm *bprm,
goto err;
}
datapos = ALIGN(realdatastart +
- MAX_SHARED_LIBS * sizeof(unsigned long),
+ DATA_START_OFFSET_WORDS * sizeof(unsigned long),
FLAT_DATA_ALIGN);
pr_debug("Allocated data+bss+stack (%u bytes): %lx\n",
@@ -622,7 +629,8 @@ static int load_flat_file(struct linux_binprm *bprm,
memp_size = len;
} else {
- len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32);
+ len = text_len + data_len + extra +
+ DATA_START_OFFSET_WORDS * sizeof(u32);
len = PAGE_ALIGN(len);
textpos = vm_mmap(NULL, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
@@ -638,7 +646,7 @@ static int load_flat_file(struct linux_binprm *bprm,
realdatastart = textpos + ntohl(hdr->data_start);
datapos = ALIGN(realdatastart +
- MAX_SHARED_LIBS * sizeof(u32),
+ DATA_START_OFFSET_WORDS * sizeof(u32),
FLAT_DATA_ALIGN);
reloc = (__be32 __user *)
@@ -714,7 +722,7 @@ static int load_flat_file(struct linux_binprm *bprm,
ret = result;
pr_err("Unable to read code+data+bss, errno %d\n", ret);
vm_munmap(textpos, text_len + data_len + extra +
- MAX_SHARED_LIBS * sizeof(u32));
+ DATA_START_OFFSET_WORDS * sizeof(u32));
goto err;
}
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 09d6f7229db9..6cc4d4cfe0c2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -79,7 +79,7 @@ static void kill_bdev(struct block_device *bdev)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
- if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
+ if (mapping_empty(mapping))
return;
invalidate_bh_lrus();
@@ -1240,14 +1240,19 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
int bdev_disk_changed(struct block_device *bdev, bool invalidate)
{
struct gendisk *disk = bdev->bd_disk;
- int ret;
+ int ret = 0;
lockdep_assert_held(&bdev->bd_mutex);
+ if (!(disk->flags & GENHD_FL_UP))
+ return -ENXIO;
+
rescan:
- ret = blk_drop_partitions(bdev);
- if (ret)
- return ret;
+ if (bdev->bd_part_count)
+ return -EBUSY;
+ sync_blockdev(bdev);
+ invalidate_bdev(bdev);
+ blk_drop_partitions(disk);
clear_bit(GD_NEED_PART_SCAN, &disk->state);
@@ -1263,9 +1268,6 @@ rescan:
if (disk_part_scan_enabled(disk) ||
!(disk->flags & GENHD_FL_REMOVABLE))
set_capacity(disk, 0);
- } else {
- if (disk->fops->revalidate_disk)
- disk->fops->revalidate_disk(disk);
}
if (get_capacity(disk)) {
@@ -1299,6 +1301,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
struct gendisk *disk = bdev->bd_disk;
int ret = 0;
+ if (!(disk->flags & GENHD_FL_UP))
+ return -ENXIO;
+
if (!bdev->bd_openers) {
if (!bdev_is_partition(bdev)) {
ret = 0;
@@ -1333,8 +1338,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
whole->bd_part_count++;
mutex_unlock(&whole->bd_mutex);
- if (!(disk->flags & GENHD_FL_UP) ||
- !bdev_nr_sectors(bdev)) {
+ if (!bdev_nr_sectors(bdev)) {
__blkdev_put(whole, mode, 1);
bdput(whole);
return -ENXIO;
@@ -1365,16 +1369,12 @@ struct block_device *blkdev_get_no_open(dev_t dev)
struct block_device *bdev;
struct gendisk *disk;
- down_read(&bdev_lookup_sem);
bdev = bdget(dev);
if (!bdev) {
- up_read(&bdev_lookup_sem);
blk_request_module(dev);
- down_read(&bdev_lookup_sem);
-
bdev = bdget(dev);
if (!bdev)
- goto unlock;
+ return NULL;
}
disk = bdev->bd_disk;
@@ -1384,14 +1384,11 @@ struct block_device *blkdev_get_no_open(dev_t dev)
goto put_disk;
if (!try_module_get(bdev->bd_disk->fops->owner))
goto put_disk;
- up_read(&bdev_lookup_sem);
return bdev;
put_disk:
put_disk(disk);
bdput:
bdput(bdev);
-unlock:
- up_read(&bdev_lookup_sem);
return NULL;
}
@@ -1437,10 +1434,6 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
if (ret)
return ERR_PTR(ret);
- /*
- * If we lost a race with 'disk' being deleted, try again. See md.c.
- */
-retry:
bdev = blkdev_get_no_open(dev);
if (!bdev)
return ERR_PTR(-ENXIO);
@@ -1487,8 +1480,6 @@ abort_claiming:
disk_unblock_events(disk);
put_blkdev:
blkdev_put_no_open(bdev);
- if (ret == -ERESTARTSYS)
- goto retry;
return ERR_PTR(ret);
}
EXPORT_SYMBOL(blkdev_get_by_dev);
@@ -1684,6 +1675,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *bd_inode = bdev_file_inode(file);
loff_t size = i_size_read(bd_inode);
struct blk_plug plug;
+ size_t shorted = 0;
ssize_t ret;
if (bdev_read_only(I_BDEV(bd_inode)))
@@ -1701,12 +1693,17 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
return -EOPNOTSUPP;
- iov_iter_truncate(from, size - iocb->ki_pos);
+ size -= iocb->ki_pos;
+ if (iov_iter_count(from) > size) {
+ shorted = iov_iter_count(from) - size;
+ iov_iter_truncate(from, size);
+ }
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
+ iov_iter_reexpand(from, iov_iter_count(from) + shorted);
blk_finish_plug(&plug);
return ret;
}
@@ -1718,13 +1715,21 @@ ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct inode *bd_inode = bdev_file_inode(file);
loff_t size = i_size_read(bd_inode);
loff_t pos = iocb->ki_pos;
+ size_t shorted = 0;
+ ssize_t ret;
if (pos >= size)
return 0;
size -= pos;
- iov_iter_truncate(to, size);
- return generic_file_read_iter(iocb, to);
+ if (iov_iter_count(to) > size) {
+ shorted = iov_iter_count(to) - size;
+ iov_iter_truncate(to, size);
+ }
+
+ ret = generic_file_read_iter(iocb, to);
+ iov_iter_reexpand(to, iov_iter_count(to) + shorted);
+ return ret;
}
EXPORT_SYMBOL_GPL(blkdev_read_iter);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 5cb4f3b88285..1346d698463a 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -632,16 +632,13 @@ static noinline int add_ra_bio_pages(struct inode *inode,
free_extent_map(em);
if (page->index == end_index) {
- char *userpage;
size_t zero_offset = offset_in_page(isize);
if (zero_offset) {
int zeros;
zeros = PAGE_SIZE - zero_offset;
- userpage = kmap_atomic(page);
- memset(userpage + zero_offset, 0, zeros);
+ memzero_page(page, zero_offset, zeros);
flush_dcache_page(page);
- kunmap_atomic(userpage);
}
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0f5b0b12762b..9fb76829a281 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3207,6 +3207,9 @@ void btrfs_update_inode_bytes(struct btrfs_inode *inode,
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
+int btrfs_fileattr_set(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct fileattr *fa);
int btrfs_ioctl_get_supported_features(void __user *arg);
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
int __pure btrfs_is_empty_uuid(u8 *uuid);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d9f20ca3ac7d..dee2dafbc872 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3421,15 +3421,12 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
}
if (page->index == last_byte >> PAGE_SHIFT) {
- char *userpage;
size_t zero_offset = offset_in_page(last_byte);
if (zero_offset) {
iosize = PAGE_SIZE - zero_offset;
- userpage = kmap_atomic(page);
- memset(userpage + zero_offset, 0, iosize);
+ memzero_page(page, zero_offset, iosize);
flush_dcache_page(page);
- kunmap_atomic(userpage);
}
}
begin_page_read(fs_info, page);
@@ -3438,14 +3435,11 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
u64 disk_bytenr;
if (cur >= last_byte) {
- char *userpage;
struct extent_state *cached = NULL;
iosize = PAGE_SIZE - pg_offset;
- userpage = kmap_atomic(page);
- memset(userpage + pg_offset, 0, iosize);
+ memzero_page(page, pg_offset, iosize);
flush_dcache_page(page);
- kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
unlock_extent_cached(tree, cur,
@@ -3528,13 +3522,10 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
/* we've found a hole, just zero and go on */
if (block_start == EXTENT_MAP_HOLE) {
- char *userpage;
struct extent_state *cached = NULL;
- userpage = kmap_atomic(page);
- memset(userpage + pg_offset, 0, iosize);
+ memzero_page(page, pg_offset, iosize);
flush_dcache_page(page);
- kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
@@ -3845,12 +3836,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
}
if (page->index == end_index) {
- char *userpage;
-
- userpage = kmap_atomic(page);
- memset(userpage + pg_offset, 0,
- PAGE_SIZE - pg_offset);
- kunmap_atomic(userpage);
+ memzero_page(page, pg_offset, PAGE_SIZE - pg_offset);
flush_dcache_page(page);
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f5d32d85247a..46f392943f4d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -646,17 +646,12 @@ again:
if (!ret) {
unsigned long offset = offset_in_page(total_compressed);
struct page *page = pages[nr_pages - 1];
- char *kaddr;
/* zero the tail end of the last page, we might be
* sending it down to disk
*/
- if (offset) {
- kaddr = kmap_atomic(page);
- memset(kaddr + offset, 0,
- PAGE_SIZE - offset);
- kunmap_atomic(kaddr);
- }
+ if (offset)
+ memzero_page(page, offset, PAGE_SIZE - offset);
will_compress = 1;
}
}
@@ -4846,7 +4841,6 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
- char *kaddr;
bool only_release_metadata = false;
u32 blocksize = fs_info->sectorsize;
pgoff_t index = from >> PAGE_SHIFT;
@@ -4938,15 +4932,13 @@ again:
if (offset != blocksize) {
if (!len)
len = blocksize - offset;
- kaddr = kmap(page);
if (front)
- memset(kaddr + (block_start - page_offset(page)),
- 0, offset);
+ memzero_page(page, (block_start - page_offset(page)),
+ offset);
else
- memset(kaddr + (block_start - page_offset(page)) + offset,
- 0, len);
+ memzero_page(page, (block_start - page_offset(page)) + offset,
+ len);
flush_dcache_page(page);
- kunmap(page);
}
ClearPageChecked(page);
set_page_dirty(page);
@@ -6845,11 +6837,9 @@ static noinline int uncompress_inline(struct btrfs_path *path,
* cover that region here.
*/
- if (max_size + pg_offset < PAGE_SIZE) {
- char *map = kmap(page);
- memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
- kunmap(page);
- }
+ if (max_size + pg_offset < PAGE_SIZE)
+ memzero_page(page, pg_offset + max_size,
+ PAGE_SIZE - max_size - pg_offset);
kfree(tmp);
return ret;
}
@@ -8519,7 +8509,6 @@ vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
- char *kaddr;
unsigned long zero_start;
loff_t size;
vm_fault_t ret;
@@ -8633,10 +8622,8 @@ again:
zero_start = PAGE_SIZE;
if (zero_start != PAGE_SIZE) {
- kaddr = kmap(page);
- memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
+ memzero_page(page, zero_start, PAGE_SIZE - zero_start);
flush_dcache_page(page);
- kunmap(page);
}
ClearPageChecked(page);
set_page_dirty(page);
@@ -10638,6 +10625,8 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
.tmpfile = btrfs_tmpfile,
+ .fileattr_get = btrfs_fileattr_get,
+ .fileattr_set = btrfs_fileattr_set,
};
static const struct file_operations btrfs_dir_file_operations = {
@@ -10691,6 +10680,8 @@ static const struct inode_operations btrfs_file_inode_operations = {
.get_acl = btrfs_get_acl,
.set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
+ .fileattr_get = btrfs_fileattr_get,
+ .fileattr_set = btrfs_fileattr_set,
};
static const struct inode_operations btrfs_special_inode_operations = {
.getattr = btrfs_getattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0ba0e4ddaf6b..5dc2fd843ae3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -26,6 +26,7 @@
#include <linux/btrfs.h>
#include <linux/uaccess.h>
#include <linux/iversion.h>
+#include <linux/fileattr.h>
#include "ctree.h"
#include "disk-io.h"
#include "export.h"
@@ -153,16 +154,6 @@ void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
new_fl);
}
-static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
-{
- struct btrfs_inode *binode = BTRFS_I(file_inode(file));
- unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
-
- if (copy_to_user(arg, &flags, sizeof(flags)))
- return -EFAULT;
- return 0;
-}
-
/*
* Check if @flags are a supported and valid set of FS_*_FL flags and that
* the old and new flags are not conflicting
@@ -201,9 +192,22 @@ static int check_fsflags_compatible(struct btrfs_fs_info *fs_info,
return 0;
}
-static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
+/*
+ * Set flags/xflags from the internal inode flags. The remaining items of
+ * fsxattr are zeroed.
+ */
+int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
{
- struct inode *inode = file_inode(file);
+ struct btrfs_inode *binode = BTRFS_I(d_inode(dentry));
+
+ fileattr_fill_flags(fa, btrfs_inode_flags_to_fsflags(binode->flags));
+ return 0;
+}
+
+int btrfs_fileattr_set(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
@@ -213,34 +217,21 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
const char *comp = NULL;
u32 binode_flags;
- if (!inode_owner_or_capable(&init_user_ns, inode))
- return -EPERM;
-
if (btrfs_root_readonly(root))
return -EROFS;
- if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
- return -EFAULT;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
+ if (fileattr_has_fsx(fa))
+ return -EOPNOTSUPP;
- btrfs_inode_lock(inode, 0);
- fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
+ fsflags = btrfs_mask_fsflags_for_type(inode, fa->flags);
old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
-
- ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
- if (ret)
- goto out_unlock;
-
ret = check_fsflags(old_fsflags, fsflags);
if (ret)
- goto out_unlock;
+ return ret;
ret = check_fsflags_compatible(fs_info, fsflags);
if (ret)
- goto out_unlock;
+ return ret;
binode_flags = binode->flags;
if (fsflags & FS_SYNC_FL)
@@ -263,6 +254,16 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
binode_flags |= BTRFS_INODE_NOATIME;
else
binode_flags &= ~BTRFS_INODE_NOATIME;
+
+ /* If coming from FS_IOC_FSSETXATTR then skip unconverted flags */
+ if (!fa->flags_valid) {
+ /* 1 item for the inode */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+ goto update_flags;
+ }
+
if (fsflags & FS_DIRSYNC_FL)
binode_flags |= BTRFS_INODE_DIRSYNC;
else
@@ -303,10 +304,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
binode_flags |= BTRFS_INODE_NOCOMPRESS;
} else if (fsflags & FS_COMPR_FL) {
- if (IS_SWAPFILE(inode)) {
- ret = -ETXTBSY;
- goto out_unlock;
- }
+ if (IS_SWAPFILE(inode))
+ return -ETXTBSY;
binode_flags |= BTRFS_INODE_COMPRESS;
binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
@@ -323,10 +322,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
* 2 for properties
*/
trans = btrfs_start_transaction(root, 3);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out_unlock;
- }
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
if (comp) {
ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
@@ -344,6 +341,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
}
}
+update_flags:
binode->flags = binode_flags;
btrfs_sync_inode_flags_to_i_flags(inode);
inode_inc_iversion(inode);
@@ -352,44 +350,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
out_end_trans:
btrfs_end_transaction(trans);
- out_unlock:
- btrfs_inode_unlock(inode, 0);
- mnt_drop_write_file(file);
return ret;
}
-/*
- * Translate btrfs internal inode flags to xflags as expected by the
- * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
- * silently dropped.
- */
-static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
-{
- unsigned int xflags = 0;
-
- if (flags & BTRFS_INODE_APPEND)
- xflags |= FS_XFLAG_APPEND;
- if (flags & BTRFS_INODE_IMMUTABLE)
- xflags |= FS_XFLAG_IMMUTABLE;
- if (flags & BTRFS_INODE_NOATIME)
- xflags |= FS_XFLAG_NOATIME;
- if (flags & BTRFS_INODE_NODUMP)
- xflags |= FS_XFLAG_NODUMP;
- if (flags & BTRFS_INODE_SYNC)
- xflags |= FS_XFLAG_SYNC;
-
- return xflags;
-}
-
-/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
-static int check_xflags(unsigned int flags)
-{
- if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME |
- FS_XFLAG_NODUMP | FS_XFLAG_SYNC))
- return -EOPNOTSUPP;
- return 0;
-}
-
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation type)
{
@@ -402,111 +365,6 @@ void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
}
-/*
- * Set the xflags from the internal inode flags. The remaining items of fsxattr
- * are zeroed.
- */
-static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
-{
- struct btrfs_inode *binode = BTRFS_I(file_inode(file));
- struct fsxattr fa;
-
- simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags));
- if (copy_to_user(arg, &fa, sizeof(fa)))
- return -EFAULT;
-
- return 0;
-}
-
-static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
-{
- struct inode *inode = file_inode(file);
- struct btrfs_inode *binode = BTRFS_I(inode);
- struct btrfs_root *root = binode->root;
- struct btrfs_trans_handle *trans;
- struct fsxattr fa, old_fa;
- unsigned old_flags;
- unsigned old_i_flags;
- int ret = 0;
-
- if (!inode_owner_or_capable(&init_user_ns, inode))
- return -EPERM;
-
- if (btrfs_root_readonly(root))
- return -EROFS;
-
- if (copy_from_user(&fa, arg, sizeof(fa)))
- return -EFAULT;
-
- ret = check_xflags(fa.fsx_xflags);
- if (ret)
- return ret;
-
- if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0)
- return -EOPNOTSUPP;
-
- ret = mnt_want_write_file(file);
- if (ret)
- return ret;
-
- btrfs_inode_lock(inode, 0);
-
- old_flags = binode->flags;
- old_i_flags = inode->i_flags;
-
- simple_fill_fsxattr(&old_fa,
- btrfs_inode_flags_to_xflags(binode->flags));
- ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
- if (ret)
- goto out_unlock;
-
- if (fa.fsx_xflags & FS_XFLAG_SYNC)
- binode->flags |= BTRFS_INODE_SYNC;
- else
- binode->flags &= ~BTRFS_INODE_SYNC;
- if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
- binode->flags |= BTRFS_INODE_IMMUTABLE;
- else
- binode->flags &= ~BTRFS_INODE_IMMUTABLE;
- if (fa.fsx_xflags & FS_XFLAG_APPEND)
- binode->flags |= BTRFS_INODE_APPEND;
- else
- binode->flags &= ~BTRFS_INODE_APPEND;
- if (fa.fsx_xflags & FS_XFLAG_NODUMP)
- binode->flags |= BTRFS_INODE_NODUMP;
- else
- binode->flags &= ~BTRFS_INODE_NODUMP;
- if (fa.fsx_xflags & FS_XFLAG_NOATIME)
- binode->flags |= BTRFS_INODE_NOATIME;
- else
- binode->flags &= ~BTRFS_INODE_NOATIME;
-
- /* 1 item for the inode */
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out_unlock;
- }
-
- btrfs_sync_inode_flags_to_i_flags(inode);
- inode_inc_iversion(inode);
- inode->i_ctime = current_time(inode);
- ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
-
- btrfs_end_transaction(trans);
-
-out_unlock:
- if (ret) {
- binode->flags = old_flags;
- inode->i_flags = old_i_flags;
- }
-
- btrfs_inode_unlock(inode, 0);
- mnt_drop_write_file(file);
-
- return ret;
-}
-
static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
{
struct inode *inode = file_inode(file);
@@ -4932,10 +4790,6 @@ long btrfs_ioctl(struct file *file, unsigned int
void __user *argp = (void __user *)arg;
switch (cmd) {
- case FS_IOC_GETFLAGS:
- return btrfs_ioctl_getflags(file, argp);
- case FS_IOC_SETFLAGS:
- return btrfs_ioctl_setflags(file, argp);
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
case FS_IOC_GETFSLABEL:
@@ -5061,10 +4915,6 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_features(fs_info, argp);
case BTRFS_IOC_SET_FEATURES:
return btrfs_ioctl_set_features(file, argp);
- case FS_IOC_FSGETXATTR:
- return btrfs_ioctl_fsgetxattr(file, argp);
- case FS_IOC_FSSETXATTR:
- return btrfs_ioctl_fssetxattr(file, argp);
case BTRFS_IOC_GET_SUBVOL_INFO:
return btrfs_ioctl_get_subvol_info(file, argp);
case BTRFS_IOC_GET_SUBVOL_ROOTREF:
@@ -5084,12 +4934,6 @@ long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
* handling is necessary.
*/
switch (cmd) {
- case FS_IOC32_GETFLAGS:
- cmd = FS_IOC_GETFLAGS;
- break;
- case FS_IOC32_SETFLAGS:
- cmd = FS_IOC_SETFLAGS;
- break;
case FS_IOC32_GETVERSION:
cmd = FS_IOC_GETVERSION;
break;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index bb768c309bc3..244d499ebc72 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1633,7 +1633,8 @@ struct btrfs_plug_cb {
/*
* rbios on the plug list are sorted for easier merging.
*/
-static int plug_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int plug_cmp(void *priv, const struct list_head *a,
+ const struct list_head *b)
{
struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
plug_list);
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 58ddc7ed9e84..9178da07cc9c 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -129,12 +129,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
* So what's in the range [500, 4095] corresponds to zeroes.
*/
if (datal < block_size) {
- char *map;
-
- map = kmap(page);
- memset(map + datal, 0, block_size - datal);
+ memzero_page(page, datal, block_size - datal);
flush_dcache_page(page);
- kunmap(page);
}
SetPageUptodate(page);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e4820e88cba0..362d14db1e38 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4141,7 +4141,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
return ret;
}
-static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int extent_cmp(void *priv, const struct list_head *a,
+ const struct list_head *b)
{
struct extent_map *em1, *em2;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bc53939fef48..47d27059d064 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1224,7 +1224,8 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
return 0;
}
-static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int devid_cmp(void *priv, const struct list_head *a,
+ const struct list_head *b)
{
struct btrfs_device *dev1, *dev2;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index d524acf7b3e5..c3fa7d3fa770 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -375,7 +375,6 @@ int zlib_decompress(struct list_head *ws, unsigned char *data_in,
unsigned long bytes_left;
unsigned long total_out = 0;
unsigned long pg_offset = 0;
- char *kaddr;
destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes_left = destlen;
@@ -455,9 +454,7 @@ next:
* end of the inline extent (destlen) to the end of the page
*/
if (pg_offset < destlen) {
- kaddr = kmap_atomic(dest_page);
- memset(kaddr + pg_offset, 0, destlen - pg_offset);
- kunmap_atomic(kaddr);
+ memzero_page(dest_page, pg_offset, destlen - pg_offset);
}
return ret;
}
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 8e9626d63976..3e26b466476a 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -631,7 +631,6 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in,
size_t ret2;
unsigned long total_out = 0;
unsigned long pg_offset = 0;
- char *kaddr;
stream = ZSTD_initDStream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
@@ -696,9 +695,7 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in,
ret = 0;
finish:
if (pg_offset < destlen) {
- kaddr = kmap_atomic(dest_page);
- memset(kaddr + pg_offset, 0, destlen - pg_offset);
- kunmap_atomic(kaddr);
+ memzero_page(dest_page, pg_offset, destlen - pg_offset);
}
return ret;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 0cb7ffd4977c..ea48c01fb76b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1020,11 +1020,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
pgoff_t index;
int sizebits;
- sizebits = -1;
- do {
- sizebits++;
- } while ((size << sizebits) < PAGE_SIZE);
-
+ sizebits = PAGE_SHIFT - __ffs(size);
index = block >> sizebits;
/*
@@ -1264,6 +1260,15 @@ static void bh_lru_install(struct buffer_head *bh)
int i;
check_irqs_on();
+ /*
+ * the refcount of buffer_head in bh_lru prevents dropping the
+ * attached page(i.e., try_to_free_buffers) so it could cause
+ * failing page migration.
+ * Skip putting upcoming bh into bh_lru until migration is done.
+ */
+ if (lru_cache_disabled())
+ return;
+
bh_lru_lock();
b = this_cpu_ptr(&bh_lrus);
@@ -1404,6 +1409,15 @@ __bread_gfp(struct block_device *bdev, sector_t block,
}
EXPORT_SYMBOL(__bread_gfp);
+static void __invalidate_bh_lrus(struct bh_lru *b)
+{
+ int i;
+
+ for (i = 0; i < BH_LRU_SIZE; i++) {
+ brelse(b->bhs[i]);
+ b->bhs[i] = NULL;
+ }
+}
/*
* invalidate_bh_lrus() is called rarely - but not only at unmount.
* This doesn't race because it runs in each cpu either in irq
@@ -1412,16 +1426,12 @@ EXPORT_SYMBOL(__bread_gfp);
static void invalidate_bh_lru(void *arg)
{
struct bh_lru *b = &get_cpu_var(bh_lrus);
- int i;
- for (i = 0; i < BH_LRU_SIZE; i++) {
- brelse(b->bhs[i]);
- b->bhs[i] = NULL;
- }
+ __invalidate_bh_lrus(b);
put_cpu_var(bh_lrus);
}
-static bool has_bh_in_lru(int cpu, void *dummy)
+bool has_bh_in_lru(int cpu, void *dummy)
{
struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
int i;
@@ -1440,6 +1450,16 @@ void invalidate_bh_lrus(void)
}
EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
+void invalidate_bh_lrus_cpu(int cpu)
+{
+ struct bh_lru *b;
+
+ bh_lru_lock();
+ b = per_cpu_ptr(&bh_lrus, cpu);
+ __invalidate_bh_lrus(b);
+ bh_lru_unlock();
+}
+
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset)
{
diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile
index 891dedda5905..2227dc2d5498 100644
--- a/fs/cachefiles/Makefile
+++ b/fs/cachefiles/Makefile
@@ -7,6 +7,7 @@ cachefiles-y := \
bind.o \
daemon.o \
interface.o \
+ io.o \
key.o \
main.o \
namei.o \
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 5efa6a3702c0..da3948fdb615 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -319,8 +319,8 @@ static void cachefiles_drop_object(struct fscache_object *_object)
/*
* dispose of a reference to an object
*/
-static void cachefiles_put_object(struct fscache_object *_object,
- enum fscache_obj_ref_trace why)
+void cachefiles_put_object(struct fscache_object *_object,
+ enum fscache_obj_ref_trace why)
{
struct cachefiles_object *object;
struct fscache_cache *cache;
@@ -568,4 +568,5 @@ const struct fscache_cache_ops cachefiles_cache_ops = {
.uncache_page = cachefiles_uncache_page,
.dissociate_pages = cachefiles_dissociate_pages,
.check_consistency = cachefiles_check_consistency,
+ .begin_read_operation = cachefiles_begin_read_operation,
};
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index cf9bd6401c2d..4ed83aa5253b 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -150,6 +150,9 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache,
*/
extern const struct fscache_cache_ops cachefiles_cache_ops;
+void cachefiles_put_object(struct fscache_object *_object,
+ enum fscache_obj_ref_trace why);
+
/*
* key.c
*/
@@ -218,6 +221,12 @@ extern int cachefiles_write_page(struct fscache_storage *, struct page *);
extern void cachefiles_uncache_page(struct fscache_object *, struct page *);
/*
+ * rdwr2.c
+ */
+extern int cachefiles_begin_read_operation(struct netfs_read_request *,
+ struct fscache_retrieval *);
+
+/*
* security.c
*/
extern int cachefiles_get_security_ID(struct cachefiles_cache *cache);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
new file mode 100644
index 000000000000..b13fb45fc3f3
--- /dev/null
+++ b/fs/cachefiles/io.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* kiocb-using read/write
+ *
+ * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/uio.h>
+#include <linux/sched/mm.h>
+#include <linux/netfs.h>
+#include "internal.h"
+
+struct cachefiles_kiocb {
+ struct kiocb iocb;
+ refcount_t ki_refcnt;
+ loff_t start;
+ union {
+ size_t skipped;
+ size_t len;
+ };
+ netfs_io_terminated_t term_func;
+ void *term_func_priv;
+ bool was_async;
+};
+
+static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
+{
+ if (refcount_dec_and_test(&ki->ki_refcnt)) {
+ fput(ki->iocb.ki_filp);
+ kfree(ki);
+ }
+}
+
+/*
+ * Handle completion of a read from the cache.
+ */
+static void cachefiles_read_complete(struct kiocb *iocb, long ret, long ret2)
+{
+ struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
+
+ _enter("%ld,%ld", ret, ret2);
+
+ if (ki->term_func) {
+ if (ret >= 0)
+ ret += ki->skipped;
+ ki->term_func(ki->term_func_priv, ret, ki->was_async);
+ }
+
+ cachefiles_put_kiocb(ki);
+}
+
+/*
+ * Initiate a read from the cache.
+ */
+static int cachefiles_read(struct netfs_cache_resources *cres,
+ loff_t start_pos,
+ struct iov_iter *iter,
+ bool seek_data,
+ netfs_io_terminated_t term_func,
+ void *term_func_priv)
+{
+ struct cachefiles_kiocb *ki;
+ struct file *file = cres->cache_priv2;
+ unsigned int old_nofs;
+ ssize_t ret = -ENOBUFS;
+ size_t len = iov_iter_count(iter), skipped = 0;
+
+ _enter("%pD,%li,%llx,%zx/%llx",
+ file, file_inode(file)->i_ino, start_pos, len,
+ i_size_read(file->f_inode));
+
+ /* If the caller asked us to seek for data before doing the read, then
+ * we should do that now. If we find a gap, we fill it with zeros.
+ */
+ if (seek_data) {
+ loff_t off = start_pos, off2;
+
+ off2 = vfs_llseek(file, off, SEEK_DATA);
+ if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
+ skipped = 0;
+ ret = off2;
+ goto presubmission_error;
+ }
+
+ if (off2 == -ENXIO || off2 >= start_pos + len) {
+ /* The region is beyond the EOF or there's no more data
+ * in the region, so clear the rest of the buffer and
+ * return success.
+ */
+ iov_iter_zero(len, iter);
+ skipped = len;
+ ret = 0;
+ goto presubmission_error;
+ }
+
+ skipped = off2 - off;
+ iov_iter_zero(skipped, iter);
+ }
+
+ ret = -ENOBUFS;
+ ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
+ if (!ki)
+ goto presubmission_error;
+
+ refcount_set(&ki->ki_refcnt, 2);
+ ki->iocb.ki_filp = file;
+ ki->iocb.ki_pos = start_pos + skipped;
+ ki->iocb.ki_flags = IOCB_DIRECT;
+ ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
+ ki->iocb.ki_ioprio = get_current_ioprio();
+ ki->skipped = skipped;
+ ki->term_func = term_func;
+ ki->term_func_priv = term_func_priv;
+ ki->was_async = true;
+
+ if (ki->term_func)
+ ki->iocb.ki_complete = cachefiles_read_complete;
+
+ get_file(ki->iocb.ki_filp);
+
+ old_nofs = memalloc_nofs_save();
+ ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
+ memalloc_nofs_restore(old_nofs);
+ switch (ret) {
+ case -EIOCBQUEUED:
+ goto in_progress;
+
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /* There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ ret = -EINTR;
+ fallthrough;
+ default:
+ ki->was_async = false;
+ cachefiles_read_complete(&ki->iocb, ret, 0);
+ if (ret > 0)
+ ret = 0;
+ break;
+ }
+
+in_progress:
+ cachefiles_put_kiocb(ki);
+ _leave(" = %zd", ret);
+ return ret;
+
+presubmission_error:
+ if (term_func)
+ term_func(term_func_priv, ret < 0 ? ret : skipped, false);
+ return ret;
+}
+
+/*
+ * Handle completion of a write to the cache.
+ */
+static void cachefiles_write_complete(struct kiocb *iocb, long ret, long ret2)
+{
+ struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
+ struct inode *inode = file_inode(ki->iocb.ki_filp);
+
+ _enter("%ld,%ld", ret, ret2);
+
+ /* Tell lockdep we inherited freeze protection from submission thread */
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+ __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
+
+ if (ki->term_func)
+ ki->term_func(ki->term_func_priv, ret, ki->was_async);
+
+ cachefiles_put_kiocb(ki);
+}
+
+/*
+ * Initiate a write to the cache.
+ */
+static int cachefiles_write(struct netfs_cache_resources *cres,
+ loff_t start_pos,
+ struct iov_iter *iter,
+ netfs_io_terminated_t term_func,
+ void *term_func_priv)
+{
+ struct cachefiles_kiocb *ki;
+ struct inode *inode;
+ struct file *file = cres->cache_priv2;
+ unsigned int old_nofs;
+ ssize_t ret = -ENOBUFS;
+ size_t len = iov_iter_count(iter);
+
+ _enter("%pD,%li,%llx,%zx/%llx",
+ file, file_inode(file)->i_ino, start_pos, len,
+ i_size_read(file->f_inode));
+
+ ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
+ if (!ki)
+ goto presubmission_error;
+
+ refcount_set(&ki->ki_refcnt, 2);
+ ki->iocb.ki_filp = file;
+ ki->iocb.ki_pos = start_pos;
+ ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
+ ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
+ ki->iocb.ki_ioprio = get_current_ioprio();
+ ki->start = start_pos;
+ ki->len = len;
+ ki->term_func = term_func;
+ ki->term_func_priv = term_func_priv;
+ ki->was_async = true;
+
+ if (ki->term_func)
+ ki->iocb.ki_complete = cachefiles_write_complete;
+
+ /* Open-code file_start_write here to grab freeze protection, which
+ * will be released by another thread in aio_complete_rw(). Fool
+ * lockdep by telling it the lock got released so that it doesn't
+ * complain about the held lock when we return to userspace.
+ */
+ inode = file_inode(file);
+ __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
+ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+
+ get_file(ki->iocb.ki_filp);
+
+ old_nofs = memalloc_nofs_save();
+ ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
+ memalloc_nofs_restore(old_nofs);
+ switch (ret) {
+ case -EIOCBQUEUED:
+ goto in_progress;
+
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /* There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ ret = -EINTR;
+ fallthrough;
+ default:
+ ki->was_async = false;
+ cachefiles_write_complete(&ki->iocb, ret, 0);
+ if (ret > 0)
+ ret = 0;
+ break;
+ }
+
+in_progress:
+ cachefiles_put_kiocb(ki);
+ _leave(" = %zd", ret);
+ return ret;
+
+presubmission_error:
+ if (term_func)
+ term_func(term_func_priv, -ENOMEM, false);
+ return -ENOMEM;
+}
+
+/*
+ * Prepare a read operation, shortening it to a cached/uncached
+ * boundary as appropriate.
+ */
+static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq,
+ loff_t i_size)
+{
+ struct fscache_retrieval *op = subreq->rreq->cache_resources.cache_priv;
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ const struct cred *saved_cred;
+ struct file *file = subreq->rreq->cache_resources.cache_priv2;
+ loff_t off, to;
+
+ _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
+
+ object = container_of(op->op.object,
+ struct cachefiles_object, fscache);
+ cache = container_of(object->fscache.cache,
+ struct cachefiles_cache, cache);
+
+ if (!file)
+ goto cache_fail_nosec;
+
+ if (subreq->start >= i_size)
+ return NETFS_FILL_WITH_ZEROES;
+
+ cachefiles_begin_secure(cache, &saved_cred);
+
+ off = vfs_llseek(file, subreq->start, SEEK_DATA);
+ if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
+ if (off == (loff_t)-ENXIO)
+ goto download_and_store;
+ goto cache_fail;
+ }
+
+ if (off >= subreq->start + subreq->len)
+ goto download_and_store;
+
+ if (off > subreq->start) {
+ off = round_up(off, cache->bsize);
+ subreq->len = off - subreq->start;
+ goto download_and_store;
+ }
+
+ to = vfs_llseek(file, subreq->start, SEEK_HOLE);
+ if (to < 0 && to >= (loff_t)-MAX_ERRNO)
+ goto cache_fail;
+
+ if (to < subreq->start + subreq->len) {
+ if (subreq->start + subreq->len >= i_size)
+ to = round_up(to, cache->bsize);
+ else
+ to = round_down(to, cache->bsize);
+ subreq->len = to - subreq->start;
+ }
+
+ cachefiles_end_secure(cache, saved_cred);
+ return NETFS_READ_FROM_CACHE;
+
+download_and_store:
+ if (cachefiles_has_space(cache, 0, (subreq->len + PAGE_SIZE - 1) / PAGE_SIZE) == 0)
+ __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
+cache_fail:
+ cachefiles_end_secure(cache, saved_cred);
+cache_fail_nosec:
+ return NETFS_DOWNLOAD_FROM_SERVER;
+}
+
+/*
+ * Prepare for a write to occur.
+ */
+static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
+ loff_t *_start, size_t *_len, loff_t i_size)
+{
+ loff_t start = *_start;
+ size_t len = *_len, down;
+
+ /* Round to DIO size */
+ down = start - round_down(start, PAGE_SIZE);
+ *_start = start - down;
+ *_len = round_up(down + len, PAGE_SIZE);
+ return 0;
+}
+
+/*
+ * Clean up an operation.
+ */
+static void cachefiles_end_operation(struct netfs_cache_resources *cres)
+{
+ struct fscache_retrieval *op = cres->cache_priv;
+ struct file *file = cres->cache_priv2;
+
+ _enter("");
+
+ if (file)
+ fput(file);
+ if (op) {
+ fscache_op_complete(&op->op, false);
+ fscache_put_retrieval(op);
+ }
+
+ _leave("");
+}
+
+static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
+ .end_operation = cachefiles_end_operation,
+ .read = cachefiles_read,
+ .write = cachefiles_write,
+ .prepare_read = cachefiles_prepare_read,
+ .prepare_write = cachefiles_prepare_write,
+};
+
+/*
+ * Open the cache file when beginning a cache operation.
+ */
+int cachefiles_begin_read_operation(struct netfs_read_request *rreq,
+ struct fscache_retrieval *op)
+{
+ struct cachefiles_object *object;
+ struct cachefiles_cache *cache;
+ struct path path;
+ struct file *file;
+
+ _enter("");
+
+ object = container_of(op->op.object,
+ struct cachefiles_object, fscache);
+ cache = container_of(object->fscache.cache,
+ struct cachefiles_cache, cache);
+
+ path.mnt = cache->mnt;
+ path.dentry = object->backer;
+ file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
+ d_inode(object->backer), cache->cache_cred);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+ if (!S_ISREG(file_inode(file)->i_mode))
+ goto error_file;
+ if (unlikely(!file->f_op->read_iter) ||
+ unlikely(!file->f_op->write_iter)) {
+ pr_notice("Cache does not support read_iter and write_iter\n");
+ goto error_file;
+ }
+
+ fscache_get_retrieval(op);
+ rreq->cache_resources.cache_priv = op;
+ rreq->cache_resources.cache_priv2 = file;
+ rreq->cache_resources.ops = &cachefiles_netfs_cache_ops;
+ rreq->cookie_debug_id = object->fscache.debug_id;
+ _leave("");
+ return 0;
+
+error_file:
+ fput(file);
+ return -EIO;
+}
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 471e40156065..94df854147d3 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -6,6 +6,7 @@ config CEPH_FS
select LIBCRC32C
select CRYPTO_AES
select CRYPTO
+ select NETFS_SUPPORT
default n
help
Choose Y or M here to include support for mounting the
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 26e66436f005..c1570fada3d8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -12,6 +12,7 @@
#include <linux/signal.h>
#include <linux/iversion.h>
#include <linux/ktime.h>
+#include <linux/netfs.h>
#include "super.h"
#include "mds_client.h"
@@ -61,6 +62,9 @@
(CONGESTION_ON_THRESH(congestion_kb) - \
(CONGESTION_ON_THRESH(congestion_kb) >> 2))
+static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
+ struct page *page, void **_fsdata);
+
static inline struct ceph_snap_context *page_snap_context(struct page *page)
{
if (PagePrivate(page))
@@ -124,8 +128,7 @@ static int ceph_set_page_dirty(struct page *page)
* PagePrivate so that we get invalidatepage callback.
*/
BUG_ON(PagePrivate(page));
- page->private = (unsigned long)snapc;
- SetPagePrivate(page);
+ attach_page_private(page, snapc);
ret = __set_page_dirty_nobuffers(page);
WARN_ON(!PageLocked(page));
@@ -144,19 +147,19 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
{
struct inode *inode;
struct ceph_inode_info *ci;
- struct ceph_snap_context *snapc = page_snap_context(page);
+ struct ceph_snap_context *snapc;
+
+ wait_on_page_fscache(page);
inode = page->mapping->host;
ci = ceph_inode(inode);
- if (offset != 0 || length != PAGE_SIZE) {
+ if (offset != 0 || length != thp_size(page)) {
dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
inode, page, page->index, offset, length);
return;
}
- ceph_invalidate_fscache_page(inode, page);
-
WARN_ON(!PageLocked(page));
if (!PagePrivate(page))
return;
@@ -164,333 +167,222 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
dout("%p invalidatepage %p idx %lu full dirty page\n",
inode, page, page->index);
+ snapc = detach_page_private(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc);
- page->private = 0;
- ClearPagePrivate(page);
}
-static int ceph_releasepage(struct page *page, gfp_t g)
+static int ceph_releasepage(struct page *page, gfp_t gfp)
{
dout("%p releasepage %p idx %lu (%sdirty)\n", page->mapping->host,
page, page->index, PageDirty(page) ? "" : "not ");
- /* Can we release the page from the cache? */
- if (!ceph_release_fscache_page(page, g))
- return 0;
-
+ if (PageFsCache(page)) {
+ if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS))
+ return 0;
+ wait_on_page_fscache(page);
+ }
return !PagePrivate(page);
}
-/* read a single page, without unlocking it. */
-static int ceph_do_readpage(struct file *filp, struct page *page)
+static void ceph_netfs_expand_readahead(struct netfs_read_request *rreq)
{
- struct inode *inode = file_inode(filp);
+ struct inode *inode = rreq->mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- struct ceph_osd_client *osdc = &fsc->client->osdc;
- struct ceph_osd_request *req;
- struct ceph_vino vino = ceph_vino(inode);
- int err = 0;
- u64 off = page_offset(page);
- u64 len = PAGE_SIZE;
-
- if (off >= i_size_read(inode)) {
- zero_user_segment(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
- return 0;
- }
-
- if (ci->i_inline_version != CEPH_INLINE_NONE) {
- /*
- * Uptodate inline data should have been added
- * into page cache while getting Fcr caps.
- */
- if (off == 0)
- return -EINVAL;
- zero_user_segment(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
- return 0;
- }
-
- err = ceph_readpage_from_fscache(inode, page);
- if (err == 0)
- return -EINPROGRESS;
-
- dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
- vino.ino, vino.snap, filp, off, len, page, page->index);
- req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, 0, 1,
- CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL,
- ci->i_truncate_seq, ci->i_truncate_size,
- false);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ struct ceph_file_layout *lo = &ci->i_layout;
+ u32 blockoff;
+ u64 blockno;
- osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
+ /* Expand the start downward */
+ blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
+ rreq->start = blockno * lo->stripe_unit;
+ rreq->len += blockoff;
- err = ceph_osdc_start_request(osdc, req, false);
- if (!err)
- err = ceph_osdc_wait_request(osdc, req);
-
- ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
- req->r_end_latency, err);
-
- ceph_osdc_put_request(req);
- dout("readpage result %d\n", err);
-
- if (err == -ENOENT)
- err = 0;
- if (err < 0) {
- ceph_fscache_readpage_cancel(inode, page);
- if (err == -EBLOCKLISTED)
- fsc->blocklisted = true;
- goto out;
- }
- if (err < PAGE_SIZE)
- /* zero fill remainder of page */
- zero_user_segment(page, err, PAGE_SIZE);
- else
- flush_dcache_page(page);
-
- SetPageUptodate(page);
- ceph_readpage_to_fscache(inode, page);
-
-out:
- return err < 0 ? err : 0;
+ /* Now, round up the length to the next block */
+ rreq->len = roundup(rreq->len, lo->stripe_unit);
}
-static int ceph_readpage(struct file *filp, struct page *page)
+static bool ceph_netfs_clamp_length(struct netfs_read_subrequest *subreq)
{
- int r = ceph_do_readpage(filp, page);
- if (r != -EINPROGRESS)
- unlock_page(page);
- else
- r = 0;
- return r;
+ struct inode *inode = subreq->rreq->mapping->host;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u64 objno, objoff;
+ u32 xlen;
+
+ /* Truncate the extent at the end of the current block */
+ ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
+ &objno, &objoff, &xlen);
+ subreq->len = min(xlen, fsc->mount_options->rsize);
+ return true;
}
-/*
- * Finish an async read(ahead) op.
- */
-static void finish_read(struct ceph_osd_request *req)
+static void finish_netfs_read(struct ceph_osd_request *req)
{
- struct inode *inode = req->r_inode;
- struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- struct ceph_osd_data *osd_data;
- int rc = req->r_result <= 0 ? req->r_result : 0;
- int bytes = req->r_result >= 0 ? req->r_result : 0;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(req->r_inode);
+ struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+ struct netfs_read_subrequest *subreq = req->r_priv;
int num_pages;
- int i;
+ int err = req->r_result;
- dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
- if (rc == -EBLOCKLISTED)
- ceph_inode_to_client(inode)->blocklisted = true;
+ ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency,
+ req->r_end_latency, err);
- /* unlock all pages, zeroing any data we didn't read */
- osd_data = osd_req_op_extent_osd_data(req, 0);
- BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
- num_pages = calc_pages_for((u64)osd_data->alignment,
- (u64)osd_data->length);
- for (i = 0; i < num_pages; i++) {
- struct page *page = osd_data->pages[i];
-
- if (rc < 0 && rc != -ENOENT) {
- ceph_fscache_readpage_cancel(inode, page);
- goto unlock;
- }
- if (bytes < (int)PAGE_SIZE) {
- /* zero (remainder of) page */
- int s = bytes < 0 ? 0 : bytes;
- zero_user_segment(page, s, PAGE_SIZE);
- }
- dout("finish_read %p uptodate %p idx %lu\n", inode, page,
- page->index);
- flush_dcache_page(page);
- SetPageUptodate(page);
- ceph_readpage_to_fscache(inode, page);
-unlock:
- unlock_page(page);
- put_page(page);
- bytes -= PAGE_SIZE;
- }
+ dout("%s: result %d subreq->len=%zu i_size=%lld\n", __func__, req->r_result,
+ subreq->len, i_size_read(req->r_inode));
- ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
- req->r_end_latency, rc);
+ /* no object means success but no data */
+ if (err == -ENOENT)
+ err = 0;
+ else if (err == -EBLOCKLISTED)
+ fsc->blocklisted = true;
+
+ if (err >= 0 && err < subreq->len)
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+
+ netfs_subreq_terminated(subreq, err, true);
- kfree(osd_data->pages);
+ num_pages = calc_pages_for(osd_data->alignment, osd_data->length);
+ ceph_put_page_vector(osd_data->pages, num_pages, false);
+ iput(req->r_inode);
}
-/*
- * start an async read(ahead) operation. return nr_pages we submitted
- * a read for on success, or negative error code.
- */
-static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
- struct list_head *page_list, int max)
+static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq)
{
- struct ceph_osd_client *osdc =
- &ceph_inode_to_client(inode)->client->osdc;
+ struct netfs_read_request *rreq = subreq->rreq;
+ struct inode *inode = rreq->mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct page *page = lru_to_page(page_list);
- struct ceph_vino vino;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_request *req;
- u64 off;
- u64 len;
- int i;
+ struct ceph_vino vino = ceph_vino(inode);
+ struct iov_iter iter;
struct page **pages;
- pgoff_t next_index;
- int nr_pages = 0;
- int got = 0;
- int ret = 0;
-
- if (!rw_ctx) {
- /* caller of readpages does not hold buffer and read caps
- * (fadvise, madvise and readahead cases) */
- int want = CEPH_CAP_FILE_CACHE;
- ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want,
- true, &got);
- if (ret < 0) {
- dout("start_read %p, error getting cap\n", inode);
- } else if (!(got & want)) {
- dout("start_read %p, no cache cap\n", inode);
- ret = 0;
- }
- if (ret <= 0) {
- if (got)
- ceph_put_cap_refs(ci, got);
- while (!list_empty(page_list)) {
- page = lru_to_page(page_list);
- list_del(&page->lru);
- put_page(page);
- }
- return ret;
- }
- }
-
- off = (u64) page_offset(page);
+ size_t page_off;
+ int err = 0;
+ u64 len = subreq->len;
- /* count pages */
- next_index = page->index;
- list_for_each_entry_reverse(page, page_list, lru) {
- if (page->index != next_index)
- break;
- nr_pages++;
- next_index++;
- if (max && nr_pages == max)
- break;
- }
- len = nr_pages << PAGE_SHIFT;
- dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
- off, len);
- vino = ceph_vino(inode);
- req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
- 0, 1, CEPH_OSD_OP_READ,
- CEPH_OSD_FLAG_READ, NULL,
- ci->i_truncate_seq, ci->i_truncate_size,
- false);
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
+ 0, 1, CEPH_OSD_OP_READ,
+ CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
+ NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
if (IS_ERR(req)) {
- ret = PTR_ERR(req);
+ err = PTR_ERR(req);
+ req = NULL;
goto out;
}
- /* build page vector */
- nr_pages = calc_pages_for(0, len);
- pages = kmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
- if (!pages) {
- ret = -ENOMEM;
- goto out_put;
- }
- for (i = 0; i < nr_pages; ++i) {
- page = list_entry(page_list->prev, struct page, lru);
- BUG_ON(PageLocked(page));
- list_del(&page->lru);
-
- dout("start_read %p adding %p idx %lu\n", inode, page,
- page->index);
- if (add_to_page_cache_lru(page, &inode->i_data, page->index,
- GFP_KERNEL)) {
- ceph_fscache_uncache_page(inode, page);
- put_page(page);
- dout("start_read %p add_to_page_cache failed %p\n",
- inode, page);
- nr_pages = i;
- if (nr_pages > 0) {
- len = nr_pages << PAGE_SHIFT;
- osd_req_op_extent_update(req, 0, len);
- break;
- }
- goto out_pages;
- }
- pages[i] = page;
+ dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
+ iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len);
+ err = iov_iter_get_pages_alloc(&iter, &pages, len, &page_off);
+ if (err < 0) {
+ dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err);
+ goto out;
}
+
+ /* should always give us a page-aligned read */
+ WARN_ON_ONCE(page_off);
+ len = err;
+
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
- req->r_callback = finish_read;
+ req->r_callback = finish_netfs_read;
+ req->r_priv = subreq;
req->r_inode = inode;
+ ihold(inode);
- dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
- ret = ceph_osdc_start_request(osdc, req, false);
- if (ret < 0)
- goto out_pages;
+ err = ceph_osdc_start_request(req->r_osdc, req, false);
+ if (err)
+ iput(inode);
+out:
ceph_osdc_put_request(req);
+ if (err)
+ netfs_subreq_terminated(subreq, err, false);
+ dout("%s: result %d\n", __func__, err);
+}
- /* After adding locked pages to page cache, the inode holds cache cap.
- * So we can drop our cap refs. */
- if (got)
- ceph_put_cap_refs(ci, got);
+static void ceph_init_rreq(struct netfs_read_request *rreq, struct file *file)
+{
+}
- return nr_pages;
+static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
+{
+ struct inode *inode = mapping->host;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int got = (uintptr_t)priv;
-out_pages:
- for (i = 0; i < nr_pages; ++i) {
- ceph_fscache_readpage_cancel(inode, pages[i]);
- unlock_page(pages[i]);
- }
- ceph_put_page_vector(pages, nr_pages, false);
-out_put:
- ceph_osdc_put_request(req);
-out:
if (got)
ceph_put_cap_refs(ci, got);
- return ret;
}
+const struct netfs_read_request_ops ceph_netfs_read_ops = {
+ .init_rreq = ceph_init_rreq,
+ .is_cache_enabled = ceph_is_cache_enabled,
+ .begin_cache_operation = ceph_begin_cache_operation,
+ .issue_op = ceph_netfs_issue_op,
+ .expand_readahead = ceph_netfs_expand_readahead,
+ .clamp_length = ceph_netfs_clamp_length,
+ .check_write_begin = ceph_netfs_check_write_begin,
+ .cleanup = ceph_readahead_cleanup,
+};
-/*
- * Read multiple pages. Leave pages we don't read + unlock in page_list;
- * the caller (VM) cleans them up.
- */
-static int ceph_readpages(struct file *file, struct address_space *mapping,
- struct list_head *page_list, unsigned nr_pages)
+/* read a single page, without unlocking it. */
+static int ceph_readpage(struct file *file, struct page *page)
{
struct inode *inode = file_inode(file);
- struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- struct ceph_file_info *fi = file->private_data;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_vino vino = ceph_vino(inode);
+ u64 off = page_offset(page);
+ u64 len = thp_size(page);
+
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ /*
+ * Uptodate inline data should have been added
+ * into page cache while getting Fcr caps.
+ */
+ if (off == 0) {
+ unlock_page(page);
+ return -EINVAL;
+ }
+ zero_user_segment(page, 0, thp_size(page));
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+ }
+
+ dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
+ vino.ino, vino.snap, file, off, len, page, page->index);
+
+ return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL);
+}
+
+static void ceph_readahead(struct readahead_control *ractl)
+{
+ struct inode *inode = file_inode(ractl->file);
+ struct ceph_file_info *fi = ractl->file->private_data;
struct ceph_rw_context *rw_ctx;
- int rc = 0;
- int max = 0;
+ int got = 0;
+ int ret = 0;
if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
- return -EINVAL;
+ return;
- rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
- &nr_pages);
+ rw_ctx = ceph_find_rw_context(fi);
+ if (!rw_ctx) {
+ /*
+ * readahead callers do not necessarily hold Fcb caps
+ * (e.g. fadvise, madvise).
+ */
+ int want = CEPH_CAP_FILE_CACHE;
- if (rc == 0)
- goto out;
+ ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
+ if (ret < 0)
+ dout("start_read %p, error getting cap\n", inode);
+ else if (!(got & want))
+ dout("start_read %p, no cache cap\n", inode);
- rw_ctx = ceph_find_rw_context(fi);
- max = fsc->mount_options->rsize >> PAGE_SHIFT;
- dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
- inode, file, rw_ctx, nr_pages, max);
- while (!list_empty(page_list)) {
- rc = start_read(inode, rw_ctx, page_list, max);
- if (rc < 0)
- goto out;
+ if (ret <= 0)
+ return;
}
-out:
- ceph_fscache_readpages_cancel(inode, page_list);
-
- dout("readpages %p file %p ret %d\n", inode, file, rc);
- return rc;
+ netfs_readahead(ractl, &ceph_netfs_read_ops, (void *)(uintptr_t)got);
}
struct ceph_writeback_ctl
@@ -585,8 +477,8 @@ static u64 get_writepages_data_length(struct inode *inode,
spin_unlock(&ci->i_ceph_lock);
WARN_ON(!found);
}
- if (end > page_offset(page) + PAGE_SIZE)
- end = page_offset(page) + PAGE_SIZE;
+ if (end > page_offset(page) + thp_size(page))
+ end = page_offset(page) + thp_size(page);
return end > start ? end - start : 0;
}
@@ -604,7 +496,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
struct ceph_snap_context *snapc, *oldest;
loff_t page_off = page_offset(page);
int err;
- loff_t len = PAGE_SIZE;
+ loff_t len = thp_size(page);
struct ceph_writeback_ctl ceph_wbc;
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_osd_request *req;
@@ -632,7 +524,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
/* is this a partial page at end of file? */
if (page_off >= ceph_wbc.i_size) {
dout("%p page eof %llu\n", page, ceph_wbc.i_size);
- page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
+ page->mapping->a_ops->invalidatepage(page, 0, thp_size(page));
return 0;
}
@@ -658,7 +550,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
}
/* it may be a short write due to an object boundary */
- WARN_ON_ONCE(len > PAGE_SIZE);
+ WARN_ON_ONCE(len > thp_size(page));
osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len);
@@ -667,7 +559,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
if (!err)
err = ceph_osdc_wait_request(osdc, req);
- ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+ ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
ceph_osdc_put_request(req);
@@ -695,8 +587,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage cleaned page %p\n", page);
err = 0; /* vfs expects us to return 0 */
}
- page->private = 0;
- ClearPagePrivate(page);
+ oldest = detach_page_private(page);
+ WARN_ON_ONCE(oldest != snapc);
end_page_writeback(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc); /* page's reference */
@@ -755,7 +647,7 @@ static void writepages_finish(struct ceph_osd_request *req)
ceph_clear_error_write(ci);
}
- ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+ ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, rc);
/*
@@ -788,11 +680,9 @@ static void writepages_finish(struct ceph_osd_request *req)
clear_bdi_congested(inode_to_bdi(inode),
BLK_RW_ASYNC);
- ceph_put_snap_context(page_snap_context(page));
- page->private = 0;
- ClearPagePrivate(page);
- dout("unlocking %p\n", page);
+ ceph_put_snap_context(detach_page_private(page));
end_page_writeback(page);
+ dout("unlocking %p\n", page);
if (remove_page)
generic_error_remove_page(inode->i_mapping,
@@ -949,7 +839,7 @@ get_more_pages:
page_offset(page) >= i_size_read(inode)) &&
clear_page_dirty_for_io(page))
mapping->a_ops->invalidatepage(page,
- 0, PAGE_SIZE);
+ 0, thp_size(page));
unlock_page(page);
continue;
}
@@ -1038,7 +928,7 @@ get_more_pages:
pages[locked_pages++] = page;
pvec.pages[i] = NULL;
- len += PAGE_SIZE;
+ len += thp_size(page);
}
/* did we get anything? */
@@ -1087,7 +977,7 @@ new_request:
BUG_ON(IS_ERR(req));
}
BUG_ON(len < page_offset(pages[locked_pages - 1]) +
- PAGE_SIZE - offset);
+ thp_size(page) - offset);
req->r_callback = writepages_finish;
req->r_inode = inode;
@@ -1117,7 +1007,7 @@ new_request:
}
set_page_writeback(pages[i]);
- len += PAGE_SIZE;
+ len += thp_size(page);
}
if (ceph_wbc.size_stable) {
@@ -1126,7 +1016,7 @@ new_request:
/* writepages_finish() clears writeback pages
* according to the data length, so make sure
* data length covers all locked pages */
- u64 min_len = len + 1 - PAGE_SIZE;
+ u64 min_len = len + 1 - thp_size(page);
len = get_writepages_data_length(inode, pages[i - 1],
offset);
len = max(len, min_len);
@@ -1302,6 +1192,31 @@ ceph_find_incompatible(struct page *page)
return NULL;
}
+static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
+ struct page *page, void **_fsdata)
+{
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_snap_context *snapc;
+
+ snapc = ceph_find_incompatible(page);
+ if (snapc) {
+ int r;
+
+ unlock_page(page);
+ put_page(page);
+ if (IS_ERR(snapc))
+ return PTR_ERR(snapc);
+
+ ceph_queue_writeback(inode);
+ r = wait_event_killable(ci->i_cap_wq,
+ context_is_writeable_or_written(inode, snapc));
+ ceph_put_snap_context(snapc);
+ return r == 0 ? -EAGAIN : r;
+ }
+ return 0;
+}
+
/*
* We are only allowed to write into/dirty the page if the page is
* clean, or already dirty within the same snap context.
@@ -1312,75 +1227,47 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_snap_context *snapc;
struct page *page = NULL;
pgoff_t index = pos >> PAGE_SHIFT;
- int pos_in_page = pos & ~PAGE_MASK;
- int r = 0;
+ int r;
- dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
-
- for (;;) {
+ /*
+ * Uninlining should have already been done and everything updated, EXCEPT
+ * for inline_version sent to the MDS.
+ */
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page) {
- r = -ENOMEM;
- break;
- }
-
- snapc = ceph_find_incompatible(page);
- if (snapc) {
- if (IS_ERR(snapc)) {
- r = PTR_ERR(snapc);
- break;
- }
- unlock_page(page);
- put_page(page);
- page = NULL;
- ceph_queue_writeback(inode);
- r = wait_event_killable(ci->i_cap_wq,
- context_is_writeable_or_written(inode, snapc));
- ceph_put_snap_context(snapc);
- if (r != 0)
- break;
- continue;
- }
-
- if (PageUptodate(page)) {
- dout(" page %p already uptodate\n", page);
- break;
- }
+ if (!page)
+ return -ENOMEM;
/*
- * In some cases we don't need to read at all:
- * - full page write
- * - write that lies completely beyond EOF
- * - write that covers the the page from start to EOF or beyond it
+ * The inline_version on a new inode is set to 1. If that's the
+ * case, then the page is brand new and isn't yet Uptodate.
*/
- if ((pos_in_page == 0 && len == PAGE_SIZE) ||
- (pos >= i_size_read(inode)) ||
- (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) {
- zero_user_segments(page, 0, pos_in_page,
- pos_in_page + len, PAGE_SIZE);
- break;
+ r = 0;
+ if (index == 0 && ci->i_inline_version != 1) {
+ if (!PageUptodate(page)) {
+ WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
+ ci->i_inline_version);
+ r = -EINVAL;
+ }
+ goto out;
}
-
- /*
- * We need to read it. If we get back -EINPROGRESS, then the page was
- * handed off to fscache and it will be unlocked when the read completes.
- * Refind the page in that case so we can reacquire the page lock. Otherwise
- * we got a hard error or the read was completed synchronously.
- */
- r = ceph_do_readpage(file, page);
- if (r != -EINPROGRESS)
- break;
+ zero_user_segment(page, 0, thp_size(page));
+ SetPageUptodate(page);
+ goto out;
}
+ r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &page, NULL,
+ &ceph_netfs_read_ops, NULL);
+out:
+ if (r == 0)
+ wait_on_page_fscache(page);
if (r < 0) {
- if (page) {
- unlock_page(page);
+ if (page)
put_page(page);
- }
} else {
+ WARN_ON_ONCE(!PageLocked(page));
*pagep = page;
}
return r;
@@ -1438,7 +1325,7 @@ static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter)
const struct address_space_operations ceph_aops = {
.readpage = ceph_readpage,
- .readpages = ceph_readpages,
+ .readahead = ceph_readahead,
.writepage = ceph_writepage,
.writepages = ceph_writepages_start,
.write_begin = ceph_write_begin,
@@ -1470,7 +1357,6 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
- struct page *pinned_page = NULL;
loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT;
int want, got, err;
sigset_t oldset;
@@ -1478,21 +1364,20 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
ceph_block_sigs(&oldset);
- dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
- inode, ceph_vinop(inode), off, (size_t)PAGE_SIZE);
+ dout("filemap_fault %p %llx.%llx %llu trying to get caps\n",
+ inode, ceph_vinop(inode), off);
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
got = 0;
- err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1,
- &got, &pinned_page);
+ err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1, &got);
if (err < 0)
goto out_restore;
- dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
- inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
+ dout("filemap_fault %p %llu got cap refs on %s\n",
+ inode, off, ceph_cap_string(got));
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
ci->i_inline_version == CEPH_INLINE_NONE) {
@@ -1500,14 +1385,11 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
ceph_add_rw_context(fi, &rw_ctx);
ret = filemap_fault(vmf);
ceph_del_rw_context(fi, &rw_ctx);
- dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n",
- inode, off, (size_t)PAGE_SIZE,
- ceph_cap_string(got), ret);
+ dout("filemap_fault %p %llu drop cap refs %s ret %x\n",
+ inode, off, ceph_cap_string(got), ret);
} else
err = -EAGAIN;
- if (pinned_page)
- put_page(pinned_page);
ceph_put_cap_refs(ci, got);
if (err != -EAGAIN)
@@ -1542,8 +1424,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
vmf->page = page;
ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
out_inline:
- dout("filemap_fault %p %llu~%zd read inline data ret %x\n",
- inode, off, (size_t)PAGE_SIZE, ret);
+ dout("filemap_fault %p %llu read inline data ret %x\n",
+ inode, off, ret);
}
out_restore:
ceph_restore_sigs(&oldset);
@@ -1553,9 +1435,6 @@ out_restore:
return ret;
}
-/*
- * Reuse write_begin here for simplicity.
- */
static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
@@ -1591,10 +1470,10 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
goto out_free;
}
- if (off + PAGE_SIZE <= size)
- len = PAGE_SIZE;
+ if (off + thp_size(page) <= size)
+ len = thp_size(page);
else
- len = size & ~PAGE_MASK;
+ len = offset_in_thp(page, size);
dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
inode, ceph_vinop(inode), off, len, size);
@@ -1604,8 +1483,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
want = CEPH_CAP_FILE_BUFFER;
got = 0;
- err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len,
- &got, NULL);
+ err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len, &got);
if (err < 0)
goto out_free;
@@ -1832,7 +1710,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
- ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+ ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
out_put:
@@ -2057,6 +1935,10 @@ int ceph_pool_perm_check(struct inode *inode, int need)
s64 pool;
int ret, flags;
+ /* Only need to do this for regular files */
+ if (!S_ISREG(inode->i_mode))
+ return 0;
+
if (ci->i_vino.snap != CEPH_NOSNAP) {
/*
* Pool permission check needs to write to the first object.
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 2f5cb6bc78e1..9cfadbb86568 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -173,7 +173,6 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
ci->fscache = NULL;
- fscache_uncache_all_inode_pages(cookie, &ci->vfs_inode);
fscache_relinquish_cookie(cookie, &ci->i_vino, false);
}
@@ -194,7 +193,6 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
dout("fscache_file_set_cookie %p %p disabling cache\n",
inode, filp);
fscache_disable_cookie(ci->fscache, &ci->i_vino, false);
- fscache_uncache_all_inode_pages(ci->fscache, inode);
} else {
fscache_enable_cookie(ci->fscache, &ci->i_vino, i_size_read(inode),
ceph_fscache_can_enable, inode);
@@ -205,108 +203,6 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
}
}
-static void ceph_readpage_from_fscache_complete(struct page *page, void *data, int error)
-{
- if (!error)
- SetPageUptodate(page);
-
- unlock_page(page);
-}
-
-static inline bool cache_valid(struct ceph_inode_info *ci)
-{
- return ci->i_fscache_gen == ci->i_rdcache_gen;
-}
-
-
-/* Atempt to read from the fscache,
- *
- * This function is called from the readpage_nounlock context. DO NOT attempt to
- * unlock the page here (or in the callback).
- */
-int ceph_readpage_from_fscache(struct inode *inode, struct page *page)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- int ret;
-
- if (!cache_valid(ci))
- return -ENOBUFS;
-
- ret = fscache_read_or_alloc_page(ci->fscache, page,
- ceph_readpage_from_fscache_complete, NULL,
- GFP_KERNEL);
-
- switch (ret) {
- case 0: /* Page found */
- dout("page read submitted\n");
- return 0;
- case -ENOBUFS: /* Pages were not found, and can't be */
- case -ENODATA: /* Pages were not found */
- dout("page/inode not in cache\n");
- return ret;
- default:
- dout("%s: unknown error ret = %i\n", __func__, ret);
- return ret;
- }
-}
-
-int ceph_readpages_from_fscache(struct inode *inode,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- int ret;
-
- if (!cache_valid(ci))
- return -ENOBUFS;
-
- ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages,
- ceph_readpage_from_fscache_complete,
- NULL, mapping_gfp_mask(mapping));
-
- switch (ret) {
- case 0: /* All pages found */
- dout("all-page read submitted\n");
- return 0;
- case -ENOBUFS: /* Some pages were not found, and can't be */
- case -ENODATA: /* some pages were not found */
- dout("page/inode not in cache\n");
- return ret;
- default:
- dout("%s: unknown error ret = %i\n", __func__, ret);
- return ret;
- }
-}
-
-void ceph_readpage_to_fscache(struct inode *inode, struct page *page)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- int ret;
-
- if (!PageFsCache(page))
- return;
-
- if (!cache_valid(ci))
- return;
-
- ret = fscache_write_page(ci->fscache, page, i_size_read(inode),
- GFP_KERNEL);
- if (ret)
- fscache_uncache_page(ci->fscache, page);
-}
-
-void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
-
- if (!PageFsCache(page))
- return;
-
- fscache_wait_on_page_write(ci->fscache, page);
- fscache_uncache_page(ci->fscache, page);
-}
-
void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
{
if (fscache_cookie_valid(fsc->fscache)) {
@@ -329,24 +225,3 @@ void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
}
fsc->fscache = NULL;
}
-
-/*
- * caller should hold CEPH_CAP_FILE_{RD,CACHE}
- */
-void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
-{
- if (cache_valid(ci))
- return;
-
- /* resue i_truncate_mutex. There should be no pending
- * truncate while the caller holds CEPH_CAP_FILE_RD */
- mutex_lock(&ci->i_truncate_mutex);
- if (!cache_valid(ci)) {
- if (fscache_check_consistency(ci->fscache, &ci->i_vino))
- fscache_invalidate(ci->fscache);
- spin_lock(&ci->i_ceph_lock);
- ci->i_fscache_gen = ci->i_rdcache_gen;
- spin_unlock(&ci->i_ceph_lock);
- }
- mutex_unlock(&ci->i_truncate_mutex);
-}
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
index 89dbdd1eb14a..1409d6149281 100644
--- a/fs/ceph/cache.h
+++ b/fs/ceph/cache.h
@@ -9,6 +9,8 @@
#ifndef _CEPH_CACHE_H
#define _CEPH_CACHE_H
+#include <linux/netfs.h>
+
#ifdef CONFIG_CEPH_FSCACHE
extern struct fscache_netfs ceph_cache_netfs;
@@ -29,54 +31,37 @@ int ceph_readpages_from_fscache(struct inode *inode,
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages);
-void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
-void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{
ci->fscache = NULL;
- ci->i_fscache_gen = 0;
}
-static inline void ceph_fscache_invalidate(struct inode *inode)
+static inline struct fscache_cookie *ceph_fscache_cookie(struct ceph_inode_info *ci)
{
- fscache_invalidate(ceph_inode(inode)->fscache);
+ return ci->fscache;
}
-static inline void ceph_fscache_uncache_page(struct inode *inode,
- struct page *page)
+static inline void ceph_fscache_invalidate(struct inode *inode)
{
- struct ceph_inode_info *ci = ceph_inode(inode);
- return fscache_uncache_page(ci->fscache, page);
+ fscache_invalidate(ceph_inode(inode)->fscache);
}
-static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
+static inline bool ceph_is_cache_enabled(struct inode *inode)
{
- struct inode* inode = page->mapping->host;
- struct ceph_inode_info *ci = ceph_inode(inode);
- return fscache_maybe_release_page(ci->fscache, page, gfp);
-}
+ struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(inode));
-static inline void ceph_fscache_readpage_cancel(struct inode *inode,
- struct page *page)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- if (fscache_cookie_valid(ci->fscache) && PageFsCache(page))
- __fscache_uncache_page(ci->fscache, page);
+ if (!cookie)
+ return false;
+ return fscache_cookie_enabled(cookie);
}
-static inline void ceph_fscache_readpages_cancel(struct inode *inode,
- struct list_head *pages)
+static inline int ceph_begin_cache_operation(struct netfs_read_request *rreq)
{
- struct ceph_inode_info *ci = ceph_inode(inode);
- return fscache_readpages_cancel(ci->fscache, pages);
-}
+ struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(rreq->inode));
-static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci)
-{
- ci->i_fscache_gen = ci->i_rdcache_gen - 1;
+ return fscache_begin_read_operation(rreq, cookie);
}
-
#else
static inline int ceph_fscache_register(void)
@@ -102,6 +87,11 @@ static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{
}
+static inline struct fscache_cookie *ceph_fscache_cookie(struct ceph_inode_info *ci)
+{
+ return NULL;
+}
+
static inline void ceph_fscache_register_inode_cookie(struct inode *inode)
{
}
@@ -115,62 +105,19 @@ static inline void ceph_fscache_file_set_cookie(struct inode *inode,
{
}
-static inline void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
-{
-}
-
-static inline void ceph_fscache_uncache_page(struct inode *inode,
- struct page *pages)
-{
-}
-
-static inline int ceph_readpage_from_fscache(struct inode* inode,
- struct page *page)
-{
- return -ENOBUFS;
-}
-
-static inline int ceph_readpages_from_fscache(struct inode *inode,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
-{
- return -ENOBUFS;
-}
-
-static inline void ceph_readpage_to_fscache(struct inode *inode,
- struct page *page)
-{
-}
-
static inline void ceph_fscache_invalidate(struct inode *inode)
{
}
-static inline void ceph_invalidate_fscache_page(struct inode *inode,
- struct page *page)
+static inline bool ceph_is_cache_enabled(struct inode *inode)
{
+ return false;
}
-static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
-{
- return 1;
-}
-
-static inline void ceph_fscache_readpage_cancel(struct inode *inode,
- struct page *page)
-{
-}
-
-static inline void ceph_fscache_readpages_cancel(struct inode *inode,
- struct list_head *pages)
-{
-}
-
-static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci)
+static inline int ceph_begin_cache_operation(struct netfs_read_request *rreq)
{
+ return -ENOBUFS;
}
-
#endif
-#endif
+#endif /* _CEPH_CACHE_H */
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 570731c4d019..a5e93b185515 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1390,7 +1390,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
arg->flush_tid = flush_tid;
arg->oldest_flush_tid = oldest_flush_tid;
- arg->size = inode->i_size;
+ arg->size = i_size_read(inode);
ci->i_reported_size = arg->size;
arg->max_size = ci->i_wanted_max_size;
if (cap == ci->i_auth_cap) {
@@ -1867,6 +1867,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
u32 invalidating_gen = ci->i_rdcache_gen;
spin_unlock(&ci->i_ceph_lock);
+ ceph_fscache_invalidate(inode);
invalidate_mapping_pages(&inode->i_data, 0, -1);
spin_lock(&ci->i_ceph_lock);
@@ -1884,7 +1885,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
bool __ceph_should_report_size(struct ceph_inode_info *ci)
{
- loff_t size = ci->vfs_inode.i_size;
+ loff_t size = i_size_read(&ci->vfs_inode);
/* mds will adjust max size according to the reported size */
if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
return false;
@@ -2730,10 +2731,6 @@ again:
*got = need | want;
else
*got = need;
- if (S_ISREG(inode->i_mode) &&
- (need & CEPH_CAP_FILE_RD) &&
- !(*got & CEPH_CAP_FILE_CACHE))
- ceph_disable_fscache_readpage(ci);
ceph_take_cap_refs(ci, *got, true);
ret = 1;
}
@@ -2858,8 +2855,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
* due to a small max_size, make sure we check_max_size (and possibly
* ask the mds) so we don't get hung up indefinitely.
*/
-int ceph_get_caps(struct file *filp, int need, int want,
- loff_t endoff, int *got, struct page **pinned_page)
+int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got)
{
struct ceph_file_info *fi = filp->private_data;
struct inode *inode = file_inode(filp);
@@ -2957,11 +2953,11 @@ int ceph_get_caps(struct file *filp, int need, int want,
struct page *page =
find_get_page(inode->i_mapping, 0);
if (page) {
- if (PageUptodate(page)) {
- *pinned_page = page;
- break;
- }
+ bool uptodate = PageUptodate(page);
+
put_page(page);
+ if (uptodate)
+ break;
}
/*
* drop cap refs first because getattr while
@@ -2983,11 +2979,6 @@ int ceph_get_caps(struct file *filp, int need, int want,
}
break;
}
-
- if (S_ISREG(ci->vfs_inode.i_mode) &&
- (_got & CEPH_CAP_FILE_RD) && (_got & CEPH_CAP_FILE_CACHE))
- ceph_fscache_revalidate_cookie(ci);
-
*got = _got;
return 0;
}
@@ -3308,7 +3299,7 @@ static void handle_cap_grant(struct inode *inode,
dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
- inode->i_size);
+ i_size_read(inode));
/*
@@ -3358,7 +3349,13 @@ static void handle_cap_grant(struct inode *inode,
if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
(extra_info->issued & CEPH_CAP_AUTH_EXCL) == 0) {
- inode->i_mode = le32_to_cpu(grant->mode);
+ umode_t mode = le32_to_cpu(grant->mode);
+
+ if (inode_wrong_type(inode, mode))
+ pr_warn_once("inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n",
+ ceph_vinop(inode), inode->i_mode, mode);
+ else
+ inode->i_mode = mode;
inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
ci->i_btime = extra_info->btime;
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 66989c880adb..425f3356332a 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -162,34 +162,34 @@ static int metric_show(struct seq_file *s, void *p)
seq_printf(s, "item total avg_lat(us) min_lat(us) max_lat(us) stdev(us)\n");
seq_printf(s, "-----------------------------------------------------------------------------------\n");
- spin_lock(&m->read_latency_lock);
+ spin_lock(&m->read_metric_lock);
total = m->total_reads;
sum = m->read_latency_sum;
avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
min = m->read_latency_min;
max = m->read_latency_max;
sq = m->read_latency_sq_sum;
- spin_unlock(&m->read_latency_lock);
+ spin_unlock(&m->read_metric_lock);
CEPH_METRIC_SHOW("read", total, avg, min, max, sq);
- spin_lock(&m->write_latency_lock);
+ spin_lock(&m->write_metric_lock);
total = m->total_writes;
sum = m->write_latency_sum;
avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
min = m->write_latency_min;
max = m->write_latency_max;
sq = m->write_latency_sq_sum;
- spin_unlock(&m->write_latency_lock);
+ spin_unlock(&m->write_metric_lock);
CEPH_METRIC_SHOW("write", total, avg, min, max, sq);
- spin_lock(&m->metadata_latency_lock);
+ spin_lock(&m->metadata_metric_lock);
total = m->total_metadatas;
sum = m->metadata_latency_sum;
avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
min = m->metadata_latency_min;
max = m->metadata_latency_max;
sq = m->metadata_latency_sq_sum;
- spin_unlock(&m->metadata_latency_lock);
+ spin_unlock(&m->metadata_metric_lock);
CEPH_METRIC_SHOW("metadata", total, avg, min, max, sq);
seq_printf(s, "\n");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 83d9358854fb..5624fae7a603 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -631,10 +631,12 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
switch (whence) {
case SEEK_CUR:
offset += file->f_pos;
+ break;
case SEEK_SET:
break;
case SEEK_END:
retval = -EOPNOTSUPP;
+ goto out;
default:
goto out;
}
@@ -665,8 +667,8 @@ out:
/*
* Handle lookups for the hidden .snap directory.
*/
-int ceph_handle_snapdir(struct ceph_mds_request *req,
- struct dentry *dentry, int err)
+struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
+ struct dentry *dentry, int err)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */
@@ -674,16 +676,17 @@ int ceph_handle_snapdir(struct ceph_mds_request *req,
/* .snap dir? */
if (err == -ENOENT &&
ceph_snap(parent) == CEPH_NOSNAP &&
- strcmp(dentry->d_name.name,
- fsc->mount_options->snapdir_name) == 0) {
+ strcmp(dentry->d_name.name, fsc->mount_options->snapdir_name) == 0) {
+ struct dentry *res;
struct inode *inode = ceph_get_snapdir(parent);
- dout("ENOENT on snapdir %p '%pd', linking to snapdir %p\n",
- dentry, dentry, inode);
- BUG_ON(!d_unhashed(dentry));
- d_add(dentry, inode);
- err = 0;
+
+ res = d_splice_alias(inode, dentry);
+ dout("ENOENT on snapdir %p '%pd', linking to snapdir %p. Spliced dentry %p\n",
+ dentry, dentry, inode, res);
+ if (res)
+ dentry = res;
}
- return err;
+ return dentry;
}
/*
@@ -739,6 +742,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
struct ceph_mds_request *req;
+ struct dentry *res;
int op;
int mask;
int err;
@@ -789,7 +793,13 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc, NULL, req);
- err = ceph_handle_snapdir(req, dentry, err);
+ res = ceph_handle_snapdir(req, dentry, err);
+ if (IS_ERR(res)) {
+ err = PTR_ERR(res);
+ } else {
+ dentry = res;
+ err = 0;
+ }
dentry = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req); /* will dput(dentry) */
dout("lookup result=%p\n", dentry);
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e088843a7734..65540a4429b2 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -129,6 +129,10 @@ static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
vino.ino = ino;
vino.snap = CEPH_NOSNAP;
+
+ if (ceph_vino_is_reserved(vino))
+ return ERR_PTR(-ESTALE);
+
inode = ceph_find_inode(sb, vino);
if (!inode) {
struct ceph_mds_request *req;
@@ -178,8 +182,10 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
return ERR_CAST(inode);
/* We need LINK caps to reliably check i_nlink */
err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
- if (err)
+ if (err) {
+ iput(inode);
return ERR_PTR(err);
+ }
/* -ESTALE if inode as been unlinked and no file is open */
if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) {
iput(inode);
@@ -212,6 +218,10 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb,
vino.ino = sfh->ino;
vino.snap = sfh->snapid;
}
+
+ if (ceph_vino_is_reserved(vino))
+ return ERR_PTR(-ESTALE);
+
inode = ceph_find_inode(sb, vino);
if (inode)
return d_obtain_alias(inode);
@@ -248,9 +258,10 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb,
ihold(inode);
} else {
/* mds does not support lookup snapped inode */
- err = -EOPNOTSUPP;
- inode = NULL;
+ inode = ERR_PTR(-EOPNOTSUPP);
}
+ } else {
+ inode = ERR_PTR(-ESTALE);
}
ceph_mdsc_put_request(req);
@@ -261,8 +272,8 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb,
dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
}
- if (!inode)
- return ERR_PTR(-ESTALE);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
/* see comments in ceph_get_parent() */
return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 209535d5b8d3..77fc037d5beb 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -739,9 +739,12 @@ retry:
err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
req);
- err = ceph_handle_snapdir(req, dentry, err);
- if (err)
+ dentry = ceph_handle_snapdir(req, dentry, err);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
goto out_req;
+ }
+ err = 0;
if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
@@ -892,7 +895,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
if (!ret)
ret = ceph_osdc_wait_request(osdc, req);
- ceph_update_read_latency(&fsc->mdsc->metric,
+ ceph_update_read_metrics(&fsc->mdsc->metric,
req->r_start_latency,
req->r_end_latency,
ret);
@@ -1034,16 +1037,6 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
dout("ceph_aio_complete_req %p rc %d bytes %u\n",
inode, rc, osd_data->bvec_pos.iter.bi_size);
- /* r_start_latency == 0 means the request was not submitted */
- if (req->r_start_latency) {
- if (aio_req->write)
- ceph_update_write_latency(metric, req->r_start_latency,
- req->r_end_latency, rc);
- else
- ceph_update_read_latency(metric, req->r_start_latency,
- req->r_end_latency, rc);
- }
-
if (rc == -EOLDSNAPC) {
struct ceph_aio_work *aio_work;
BUG_ON(!aio_req->write);
@@ -1086,6 +1079,16 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
}
}
+ /* r_start_latency == 0 means the request was not submitted */
+ if (req->r_start_latency) {
+ if (aio_req->write)
+ ceph_update_write_metrics(metric, req->r_start_latency,
+ req->r_end_latency, rc);
+ else
+ ceph_update_read_metrics(metric, req->r_start_latency,
+ req->r_end_latency, rc);
+ }
+
put_bvecs(osd_data->bvec_pos.bvecs, osd_data->num_bvecs,
aio_req->should_dirty);
ceph_osdc_put_request(req);
@@ -1290,10 +1293,10 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
if (write)
- ceph_update_write_latency(metric, req->r_start_latency,
+ ceph_update_write_metrics(metric, req->r_start_latency,
req->r_end_latency, ret);
else
- ceph_update_read_latency(metric, req->r_start_latency,
+ ceph_update_read_metrics(metric, req->r_start_latency,
req->r_end_latency, ret);
size = i_size_read(inode);
@@ -1467,7 +1470,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
- ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+ ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, ret);
out:
ceph_osdc_put_request(req);
@@ -1510,7 +1513,6 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
size_t len = iov_iter_count(to);
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct page *pinned_page = NULL;
bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
ssize_t ret;
int want, got = 0;
@@ -1529,8 +1531,7 @@ again:
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
- ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1,
- &got, &pinned_page);
+ ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got);
if (ret < 0) {
if (iocb->ki_flags & IOCB_DIRECT)
ceph_end_io_direct(inode);
@@ -1571,10 +1572,6 @@ again:
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
- if (pinned_page) {
- put_page(pinned_page);
- pinned_page = NULL;
- }
ceph_put_cap_refs(ci, got);
if (direct_lock)
@@ -1753,8 +1750,7 @@ retry_snap:
else
want = CEPH_CAP_FILE_BUFFER;
got = 0;
- err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count,
- &got, NULL);
+ err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got);
if (err < 0)
goto out;
@@ -2083,7 +2079,7 @@ static long ceph_fallocate(struct file *file, int mode,
else
want = CEPH_CAP_FILE_BUFFER;
- ret = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
+ ret = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, endoff, &got);
if (ret < 0)
goto unlock;
@@ -2121,7 +2117,7 @@ static int get_rd_wr_caps(struct file *src_filp, int *src_got,
retry_caps:
ret = ceph_get_caps(dst_filp, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER,
- dst_endoff, dst_got, NULL);
+ dst_endoff, dst_got);
if (ret < 0)
return ret;
@@ -2143,7 +2139,7 @@ retry_caps:
return ret;
}
ret = ceph_get_caps(src_filp, CEPH_CAP_FILE_RD,
- CEPH_CAP_FILE_SHARED, -1, src_got, NULL);
+ CEPH_CAP_FILE_SHARED, -1, src_got);
if (ret < 0)
return ret;
/*... drop src_ci caps too, and retry */
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 156f849f5385..e1c63adb196d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -56,6 +56,9 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
{
struct inode *inode;
+ if (ceph_vino_is_reserved(vino))
+ return ERR_PTR(-EREMOTEIO);
+
inode = iget5_locked(sb, (unsigned long)vino.ino, ceph_ino_compare,
ceph_set_ino_cb, &vino);
if (!inode)
@@ -78,23 +81,36 @@ struct inode *ceph_get_snapdir(struct inode *parent)
struct inode *inode = ceph_get_inode(parent->i_sb, vino);
struct ceph_inode_info *ci = ceph_inode(inode);
- BUG_ON(!S_ISDIR(parent->i_mode));
if (IS_ERR(inode))
return inode;
+
+ if (!S_ISDIR(parent->i_mode)) {
+ pr_warn_once("bad snapdir parent type (mode=0%o)\n",
+ parent->i_mode);
+ return ERR_PTR(-ENOTDIR);
+ }
+
+ if (!(inode->i_state & I_NEW) && !S_ISDIR(inode->i_mode)) {
+ pr_warn_once("bad snapdir inode type (mode=0%o)\n",
+ inode->i_mode);
+ return ERR_PTR(-ENOTDIR);
+ }
+
inode->i_mode = parent->i_mode;
inode->i_uid = parent->i_uid;
inode->i_gid = parent->i_gid;
inode->i_mtime = parent->i_mtime;
inode->i_ctime = parent->i_ctime;
inode->i_atime = parent->i_atime;
- inode->i_op = &ceph_snapdir_iops;
- inode->i_fop = &ceph_snapdir_fops;
- ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
ci->i_rbytes = 0;
ci->i_btime = ceph_inode(parent)->i_btime;
- if (inode->i_state & I_NEW)
+ if (inode->i_state & I_NEW) {
+ inode->i_op = &ceph_snapdir_iops;
+ inode->i_fop = &ceph_snapdir_fops;
+ ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
unlock_new_inode(inode);
+ }
return inode;
}
@@ -616,10 +632,11 @@ int ceph_fill_file_size(struct inode *inode, int issued,
{
struct ceph_inode_info *ci = ceph_inode(inode);
int queue_trunc = 0;
+ loff_t isize = i_size_read(inode);
if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
- (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
- dout("size %lld -> %llu\n", inode->i_size, size);
+ (truncate_seq == ci->i_truncate_seq && size > isize)) {
+ dout("size %lld -> %llu\n", isize, size);
if (size > 0 && S_ISDIR(inode->i_mode)) {
pr_err("fill_file_size non-zero size for directory\n");
size = 0;
@@ -757,11 +774,32 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
bool queue_trunc = false;
bool new_version = false;
bool fill_inline = false;
+ umode_t mode = le32_to_cpu(info->mode);
+ dev_t rdev = le32_to_cpu(info->rdev);
dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__,
inode, ceph_vinop(inode), le64_to_cpu(info->version),
ci->i_version);
+ /* Once I_NEW is cleared, we can't change type or dev numbers */
+ if (inode->i_state & I_NEW) {
+ inode->i_mode = mode;
+ } else {
+ if (inode_wrong_type(inode, mode)) {
+ pr_warn_once("inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n",
+ ceph_vinop(inode), inode->i_mode, mode);
+ return -ESTALE;
+ }
+
+ if ((S_ISCHR(mode) || S_ISBLK(mode)) && inode->i_rdev != rdev) {
+ pr_warn_once("dev inode rdev changed! (ino %llx.%llx is %u:%u, mds says %u:%u)\n",
+ ceph_vinop(inode), MAJOR(inode->i_rdev),
+ MINOR(inode->i_rdev), MAJOR(rdev),
+ MINOR(rdev));
+ return -ESTALE;
+ }
+ }
+
info_caps = le32_to_cpu(info->cap.caps);
/* prealloc new cap struct */
@@ -815,8 +853,6 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
issued |= __ceph_caps_dirty(ci);
new_issued = ~issued & info_caps;
- /* update inode */
- inode->i_rdev = le32_to_cpu(info->rdev);
/* directories have fl_stripe_unit set to zero */
if (le32_to_cpu(info->layout.fl_stripe_unit))
inode->i_blkbits =
@@ -828,7 +864,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
(issued & CEPH_CAP_AUTH_EXCL) == 0) {
- inode->i_mode = le32_to_cpu(info->mode);
+ inode->i_mode = mode;
inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
@@ -894,6 +930,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
ci->i_dir_pin = iinfo->dir_pin;
+ ci->i_rsnaps = iinfo->rsnaps;
ceph_decode_timespec64(&ci->i_rctime, &info->rctime);
}
}
@@ -926,7 +963,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
case S_IFCHR:
case S_IFSOCK:
inode->i_blkbits = PAGE_SHIFT;
- init_special_inode(inode, inode->i_mode, inode->i_rdev);
+ init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &ceph_file_iops;
break;
case S_IFREG:
@@ -1787,7 +1824,7 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size)
bool ret;
spin_lock(&ci->i_ceph_lock);
- dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
+ dout("set_size %p %llu -> %llu\n", inode, i_size_read(inode), size);
i_size_write(inode, size);
inode->i_blocks = calc_inode_blocks(size);
@@ -1863,6 +1900,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
orig_gen = ci->i_rdcache_gen;
spin_unlock(&ci->i_ceph_lock);
+ ceph_fscache_invalidate(inode);
if (invalidate_inode_pages2(inode->i_mapping) < 0) {
pr_err("invalidate_pages %p fails\n", inode);
}
@@ -2093,20 +2131,19 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
}
}
if (ia_valid & ATTR_SIZE) {
- dout("setattr %p size %lld -> %lld\n", inode,
- inode->i_size, attr->ia_size);
- if ((issued & CEPH_CAP_FILE_EXCL) &&
- attr->ia_size > inode->i_size) {
+ loff_t isize = i_size_read(inode);
+
+ dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size);
+ if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size > isize) {
i_size_write(inode, attr->ia_size);
inode->i_blocks = calc_inode_blocks(attr->ia_size);
ci->i_reported_size = attr->ia_size;
dirtied |= CEPH_CAP_FILE_EXCL;
ia_valid |= ATTR_MTIME;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
- attr->ia_size != inode->i_size) {
+ attr->ia_size != isize) {
req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
- req->r_args.setattr.old_size =
- cpu_to_le64(inode->i_size);
+ req->r_args.setattr.old_size = cpu_to_le64(isize);
mask |= CEPH_SETATTR_SIZE;
release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
@@ -2216,7 +2253,7 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return err;
if ((attr->ia_valid & ATTR_SIZE) &&
- attr->ia_size > max(inode->i_size, fsc->max_file_size))
+ attr->ia_size > max(i_size_read(inode), fsc->max_file_size))
return -EFBIG;
if ((attr->ia_valid & ATTR_SIZE) &&
diff --git a/fs/ceph/io.c b/fs/ceph/io.c
index 97602ea92ff4..c456509b31c3 100644
--- a/fs/ceph/io.c
+++ b/fs/ceph/io.c
@@ -118,7 +118,7 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
}
/**
- * ceph_end_io_direct - declare the file is being used for direct i/o
+ * ceph_start_io_direct - declare the file is being used for direct i/o
* @inode: file inode
*
* Declare that a direct I/O operation is about to start, and ensure
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index d87bd852ed96..e5af591d3bd4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -176,6 +176,13 @@ static int parse_reply_info_in(void **p, void *end,
memset(&info->snap_btime, 0, sizeof(info->snap_btime));
}
+ /* snapshot count, remains zero for v<=3 */
+ if (struct_v >= 4) {
+ ceph_decode_64_safe(p, end, info->rsnaps, bad);
+ } else {
+ info->rsnaps = 0;
+ }
+
*p = end;
} else {
if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
@@ -214,7 +221,7 @@ static int parse_reply_info_in(void **p, void *end,
}
info->dir_pin = -ENODATA;
- /* info->snap_btime remains zero */
+ /* info->snap_btime and info->rsnaps remain zero */
}
return 0;
bad:
@@ -433,6 +440,13 @@ static int ceph_parse_deleg_inos(void **p, void *end,
ceph_decode_64_safe(p, end, start, bad);
ceph_decode_64_safe(p, end, len, bad);
+
+ /* Don't accept a delegation of system inodes */
+ if (start < CEPH_INO_SYSTEM_BASE) {
+ pr_warn_ratelimited("ceph: ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n",
+ start, len);
+ continue;
+ }
while (len--) {
int err = xa_insert(&s->s_delegated_inos, ino = start++,
DELEGATED_INO_AVAILABLE,
@@ -3306,7 +3320,7 @@ out_err:
/* kick calling process */
complete_request(mdsc, req);
- ceph_update_metadata_latency(&mdsc->metric, req->r_start_latency,
+ ceph_update_metadata_metrics(&mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
out:
ceph_mdsc_put_request(req);
@@ -3780,7 +3794,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
rec.v1.cap_id = cpu_to_le64(cap->cap_id);
rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
rec.v1.issued = cpu_to_le32(cap->issued);
- rec.v1.size = cpu_to_le64(inode->i_size);
+ rec.v1.size = cpu_to_le64(i_size_read(inode));
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index eaa7c5422116..15c11a0f2caf 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -88,6 +88,7 @@ struct ceph_mds_reply_info_in {
s32 dir_pin;
struct ceph_timespec btime;
struct ceph_timespec snap_btime;
+ u64 rsnaps;
u64 change_attr;
};
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index 5ec94bd4c1de..28b6b42ad677 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -17,6 +17,9 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
struct ceph_metric_write_latency *write;
struct ceph_metric_metadata_latency *meta;
struct ceph_metric_dlease *dlease;
+ struct ceph_opened_files *files;
+ struct ceph_pinned_icaps *icaps;
+ struct ceph_opened_inodes *inodes;
struct ceph_client_metric *m = &mdsc->metric;
u64 nr_caps = atomic64_read(&m->total_caps);
struct ceph_msg *msg;
@@ -26,7 +29,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
s32 len;
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
- + sizeof(*meta) + sizeof(*dlease);
+ + sizeof(*meta) + sizeof(*dlease) + sizeof(*files)
+ + sizeof(*icaps) + sizeof(*inodes);
msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
if (!msg) {
@@ -95,6 +99,38 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
items++;
+ sum = percpu_counter_sum(&m->total_inodes);
+
+ /* encode the opened files metric */
+ files = (struct ceph_opened_files *)(dlease + 1);
+ files->type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
+ files->ver = 1;
+ files->compat = 1;
+ files->data_len = cpu_to_le32(sizeof(*files) - 10);
+ files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
+ files->total = cpu_to_le64(sum);
+ items++;
+
+ /* encode the pinned icaps metric */
+ icaps = (struct ceph_pinned_icaps *)(files + 1);
+ icaps->type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
+ icaps->ver = 1;
+ icaps->compat = 1;
+ icaps->data_len = cpu_to_le32(sizeof(*icaps) - 10);
+ icaps->pinned_icaps = cpu_to_le64(nr_caps);
+ icaps->total = cpu_to_le64(sum);
+ items++;
+
+ /* encode the opened inodes metric */
+ inodes = (struct ceph_opened_inodes *)(icaps + 1);
+ inodes->type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
+ inodes->ver = 1;
+ inodes->compat = 1;
+ inodes->data_len = cpu_to_le32(sizeof(*inodes) - 10);
+ inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
+ inodes->total = cpu_to_le64(sum);
+ items++;
+
put_unaligned_le32(items, &head->num);
msg->front.iov_len = len;
msg->hdr.version = cpu_to_le16(1);
@@ -183,21 +219,21 @@ int ceph_metric_init(struct ceph_client_metric *m)
if (ret)
goto err_i_caps_mis;
- spin_lock_init(&m->read_latency_lock);
+ spin_lock_init(&m->read_metric_lock);
m->read_latency_sq_sum = 0;
m->read_latency_min = KTIME_MAX;
m->read_latency_max = 0;
m->total_reads = 0;
m->read_latency_sum = 0;
- spin_lock_init(&m->write_latency_lock);
+ spin_lock_init(&m->write_metric_lock);
m->write_latency_sq_sum = 0;
m->write_latency_min = KTIME_MAX;
m->write_latency_max = 0;
m->total_writes = 0;
m->write_latency_sum = 0;
- spin_lock_init(&m->metadata_latency_lock);
+ spin_lock_init(&m->metadata_metric_lock);
m->metadata_latency_sq_sum = 0;
m->metadata_latency_min = KTIME_MAX;
m->metadata_latency_max = 0;
@@ -274,7 +310,7 @@ static inline void __update_latency(ktime_t *totalp, ktime_t *lsump,
*sq_sump += sq;
}
-void ceph_update_read_latency(struct ceph_client_metric *m,
+void ceph_update_read_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc)
{
@@ -283,14 +319,14 @@ void ceph_update_read_latency(struct ceph_client_metric *m,
if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT))
return;
- spin_lock(&m->read_latency_lock);
+ spin_lock(&m->read_metric_lock);
__update_latency(&m->total_reads, &m->read_latency_sum,
&m->read_latency_min, &m->read_latency_max,
&m->read_latency_sq_sum, lat);
- spin_unlock(&m->read_latency_lock);
+ spin_unlock(&m->read_metric_lock);
}
-void ceph_update_write_latency(struct ceph_client_metric *m,
+void ceph_update_write_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc)
{
@@ -299,14 +335,14 @@ void ceph_update_write_latency(struct ceph_client_metric *m,
if (unlikely(rc && rc != -ETIMEDOUT))
return;
- spin_lock(&m->write_latency_lock);
+ spin_lock(&m->write_metric_lock);
__update_latency(&m->total_writes, &m->write_latency_sum,
&m->write_latency_min, &m->write_latency_max,
&m->write_latency_sq_sum, lat);
- spin_unlock(&m->write_latency_lock);
+ spin_unlock(&m->write_metric_lock);
}
-void ceph_update_metadata_latency(struct ceph_client_metric *m,
+void ceph_update_metadata_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc)
{
@@ -315,9 +351,9 @@ void ceph_update_metadata_latency(struct ceph_client_metric *m,
if (unlikely(rc && rc != -ENOENT))
return;
- spin_lock(&m->metadata_latency_lock);
+ spin_lock(&m->metadata_metric_lock);
__update_latency(&m->total_metadatas, &m->metadata_latency_sum,
&m->metadata_latency_min, &m->metadata_latency_max,
&m->metadata_latency_sq_sum, lat);
- spin_unlock(&m->metadata_latency_lock);
+ spin_unlock(&m->metadata_metric_lock);
}
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index af6038ff39d4..e984eb2bb14b 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -14,8 +14,11 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_WRITE_LATENCY,
CLIENT_METRIC_TYPE_METADATA_LATENCY,
CLIENT_METRIC_TYPE_DENTRY_LEASE,
+ CLIENT_METRIC_TYPE_OPENED_FILES,
+ CLIENT_METRIC_TYPE_PINNED_ICAPS,
+ CLIENT_METRIC_TYPE_OPENED_INODES,
- CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
+ CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_OPENED_INODES,
};
/*
@@ -28,6 +31,9 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_WRITE_LATENCY, \
CLIENT_METRIC_TYPE_METADATA_LATENCY, \
CLIENT_METRIC_TYPE_DENTRY_LEASE, \
+ CLIENT_METRIC_TYPE_OPENED_FILES, \
+ CLIENT_METRIC_TYPE_PINNED_ICAPS, \
+ CLIENT_METRIC_TYPE_OPENED_INODES, \
\
CLIENT_METRIC_TYPE_MAX, \
}
@@ -94,6 +100,42 @@ struct ceph_metric_dlease {
__le64 total;
} __packed;
+/* metric opened files header */
+struct ceph_opened_files {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(opened_files + total) */
+ __le64 opened_files;
+ __le64 total;
+} __packed;
+
+/* metric pinned i_caps header */
+struct ceph_pinned_icaps {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(pinned_icaps + total) */
+ __le64 pinned_icaps;
+ __le64 total;
+} __packed;
+
+/* metric opened inodes header */
+struct ceph_opened_inodes {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(opened_inodes + total) */
+ __le64 opened_inodes;
+ __le64 total;
+} __packed;
+
struct ceph_metric_head {
__le32 num; /* the number of metrics that will be sent */
} __packed;
@@ -108,21 +150,21 @@ struct ceph_client_metric {
struct percpu_counter i_caps_hit;
struct percpu_counter i_caps_mis;
- spinlock_t read_latency_lock;
+ spinlock_t read_metric_lock;
u64 total_reads;
ktime_t read_latency_sum;
ktime_t read_latency_sq_sum;
ktime_t read_latency_min;
ktime_t read_latency_max;
- spinlock_t write_latency_lock;
+ spinlock_t write_metric_lock;
u64 total_writes;
ktime_t write_latency_sum;
ktime_t write_latency_sq_sum;
ktime_t write_latency_min;
ktime_t write_latency_max;
- spinlock_t metadata_latency_lock;
+ spinlock_t metadata_metric_lock;
u64 total_metadatas;
ktime_t metadata_latency_sum;
ktime_t metadata_latency_sq_sum;
@@ -162,13 +204,13 @@ static inline void ceph_update_cap_mis(struct ceph_client_metric *m)
percpu_counter_inc(&m->i_caps_mis);
}
-extern void ceph_update_read_latency(struct ceph_client_metric *m,
+extern void ceph_update_read_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc);
-extern void ceph_update_write_latency(struct ceph_client_metric *m,
+extern void ceph_update_write_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc);
-extern void ceph_update_metadata_latency(struct ceph_client_metric *m,
+extern void ceph_update_metadata_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc);
#endif /* _FS_CEPH_MDS_METRIC_H */
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 0728b01d4d43..4ce18055d931 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -605,7 +605,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
BUG_ON(capsnap->writing);
- capsnap->size = inode->i_size;
+ capsnap->size = i_size_read(inode);
capsnap->mtime = inode->i_mtime;
capsnap->atime = inode->i_atime;
capsnap->ctime = inode->i_ctime;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c48bb30c8d70..db80d89556b1 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -21,6 +21,7 @@
#include <linux/ceph/libceph.h>
#ifdef CONFIG_CEPH_FSCACHE
+#define FSCACHE_USE_NEW_IO_API
#include <linux/fscache.h>
#endif
@@ -333,7 +334,7 @@ struct ceph_inode_info {
/* for dirs */
struct timespec64 i_rctime;
- u64 i_rbytes, i_rfiles, i_rsubdirs;
+ u64 i_rbytes, i_rfiles, i_rsubdirs, i_rsnaps;
u64 i_files, i_subdirs;
/* quotas */
@@ -427,7 +428,6 @@ struct ceph_inode_info {
#ifdef CONFIG_CEPH_FSCACHE
struct fscache_cookie *fscache;
- u32 i_fscache_gen;
#endif
errseq_t i_meta_err;
@@ -529,10 +529,34 @@ static inline int ceph_ino_compare(struct inode *inode, void *data)
ci->i_vino.snap == pvino->snap;
}
+/*
+ * The MDS reserves a set of inodes for its own usage. These should never
+ * be accessible by clients, and so the MDS has no reason to ever hand these
+ * out. The range is CEPH_MDS_INO_MDSDIR_OFFSET..CEPH_INO_SYSTEM_BASE.
+ *
+ * These come from src/mds/mdstypes.h in the ceph sources.
+ */
+#define CEPH_MAX_MDS 0x100
+#define CEPH_NUM_STRAY 10
+#define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS)
+#define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
+
+static inline bool ceph_vino_is_reserved(const struct ceph_vino vino)
+{
+ if (vino.ino < CEPH_INO_SYSTEM_BASE &&
+ vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) {
+ WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino);
+ return true;
+ }
+ return false;
+}
static inline struct inode *ceph_find_inode(struct super_block *sb,
struct ceph_vino vino)
{
+ if (ceph_vino_is_reserved(vino))
+ return NULL;
+
/*
* NB: The hashval will be run through the fs/inode.c hash function
* anyway, so there is no need to squash the inode number down to
@@ -1156,7 +1180,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
int mds, int drop, int unless);
extern int ceph_get_caps(struct file *filp, int need, int want,
- loff_t endoff, int *got, struct page **pinned_page);
+ loff_t endoff, int *got);
extern int ceph_try_get_caps(struct inode *inode,
int need, int want, bool nonblock, int *got);
@@ -1193,7 +1217,7 @@ extern const struct dentry_operations ceph_dentry_ops;
extern loff_t ceph_make_fpos(unsigned high, unsigned off, bool hash_order);
extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
-extern int ceph_handle_snapdir(struct ceph_mds_request *req,
+extern struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
struct dentry *dentry, int err);
extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 02f59bcb4f27..1242db8d3444 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -233,6 +233,12 @@ static ssize_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
return ceph_fmt_xattr(val, size, "%lld", ci->i_rsubdirs);
}
+static ssize_t ceph_vxattrcb_dir_rsnaps(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ return ceph_fmt_xattr(val, size, "%lld", ci->i_rsnaps);
+}
+
static ssize_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
size_t size)
{
@@ -384,6 +390,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
XATTR_RSTAT_FIELD(dir, rentries),
XATTR_RSTAT_FIELD(dir, rfiles),
XATTR_RSTAT_FIELD(dir, rsubdirs),
+ XATTR_RSTAT_FIELD(dir, rsnaps),
XATTR_RSTAT_FIELD(dir, rbytes),
XATTR_RSTAT_FIELD(dir, rctime),
{
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 88a7958170ee..68e8e5b27841 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -17,15 +17,14 @@
#include "cifsproto.h"
#include "cifs_debug.h"
#include "cifsfs.h"
+#include "fs_context.h"
#ifdef CONFIG_CIFS_DFS_UPCALL
#include "dfs_cache.h"
#endif
#ifdef CONFIG_CIFS_SMB_DIRECT
#include "smbdirect.h"
#endif
-#ifdef CONFIG_CIFS_SWN_UPCALL
#include "cifs_swn.h"
-#endif
void
cifs_dump_mem(char *label, void *data, int length)
@@ -118,10 +117,8 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
seq_printf(m, " POSIX Extensions");
if (tcon->ses->server->ops->dump_share_caps)
tcon->ses->server->ops->dump_share_caps(m, tcon);
-#ifdef CONFIG_CIFS_SWN_UPCALL
if (tcon->use_witness)
seq_puts(m, " Witness");
-#endif
if (tcon->need_reconnect)
seq_puts(m, "\tDISCONNECTED ");
@@ -490,10 +487,8 @@ skip_rdma:
spin_unlock(&cifs_tcp_ses_lock);
seq_putc(m, '\n');
-
-#ifdef CONFIG_CIFS_SWN_UPCALL
cifs_swn_dump(m);
-#endif
+
/* BB add code to dump additional info such as TCP session info now */
return 0;
}
@@ -702,6 +697,7 @@ static const struct proc_ops cifs_lookup_cache_proc_ops;
static const struct proc_ops traceSMB_proc_ops;
static const struct proc_ops cifs_security_flags_proc_ops;
static const struct proc_ops cifs_linux_ext_proc_ops;
+static const struct proc_ops cifs_mount_params_proc_ops;
void
cifs_proc_init(void)
@@ -726,6 +722,8 @@ cifs_proc_init(void)
proc_create("LookupCacheEnabled", 0644, proc_fs_cifs,
&cifs_lookup_cache_proc_ops);
+ proc_create("mount_params", 0444, proc_fs_cifs, &cifs_mount_params_proc_ops);
+
#ifdef CONFIG_CIFS_DFS_UPCALL
proc_create("dfscache", 0644, proc_fs_cifs, &dfscache_proc_ops);
#endif
@@ -764,6 +762,7 @@ cifs_proc_clean(void)
remove_proc_entry("SecurityFlags", proc_fs_cifs);
remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
+ remove_proc_entry("mount_params", proc_fs_cifs);
#ifdef CONFIG_CIFS_DFS_UPCALL
remove_proc_entry("dfscache", proc_fs_cifs);
@@ -1023,6 +1022,51 @@ static const struct proc_ops cifs_security_flags_proc_ops = {
.proc_release = single_release,
.proc_write = cifs_security_flags_proc_write,
};
+
+/* To make it easier to debug, can help to show mount params */
+static int cifs_mount_params_proc_show(struct seq_file *m, void *v)
+{
+ const struct fs_parameter_spec *p;
+ const char *type;
+
+ for (p = smb3_fs_parameters; p->name; p++) {
+ /* cannot use switch with pointers... */
+ if (!p->type) {
+ if (p->flags == fs_param_neg_with_no)
+ type = "noflag";
+ else
+ type = "flag";
+ } else if (p->type == fs_param_is_bool)
+ type = "bool";
+ else if (p->type == fs_param_is_u32)
+ type = "u32";
+ else if (p->type == fs_param_is_u64)
+ type = "u64";
+ else if (p->type == fs_param_is_string)
+ type = "string";
+ else
+ type = "unknown";
+
+ seq_printf(m, "%s:%s\n", p->name, type);
+ }
+
+ return 0;
+}
+
+static int cifs_mount_params_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, cifs_mount_params_proc_show, NULL);
+}
+
+static const struct proc_ops cifs_mount_params_proc_ops = {
+ .proc_open = cifs_mount_params_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+ /* No need for write for now */
+ /* .proc_write = cifs_mount_params_proc_write, */
+};
+
#else
inline void cifs_proc_init(void)
{
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 6b1ce4efb591..c87c37cf2914 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -270,7 +270,7 @@ static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt,
char *mountdata;
char *devname;
- devname = kstrndup(fullpath, strlen(fullpath), GFP_KERNEL);
+ devname = kstrdup(fullpath, GFP_KERNEL);
if (!devname)
return ERR_PTR(-ENOMEM);
@@ -302,6 +302,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
struct cifs_sb_info *cifs_sb;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
+ void *page;
char *full_path, *root_path;
unsigned int xid;
int rc;
@@ -324,10 +325,13 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
goto cdda_exit;
}
+ page = alloc_dentry_path();
/* always use tree name prefix */
- full_path = build_path_from_dentry_optional_prefix(mntpt, true);
- if (full_path == NULL)
- goto cdda_exit;
+ full_path = build_path_from_dentry_optional_prefix(mntpt, page, true);
+ if (IS_ERR(full_path)) {
+ mnt = ERR_CAST(full_path);
+ goto free_full_path;
+ }
convert_delimiter(full_path, '\\');
@@ -385,7 +389,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
free_root_path:
kfree(root_path);
free_full_path:
- kfree(full_path);
+ free_dentry_path(page);
cdda_exit:
cifs_dbg(FYI, "leaving %s\n" , __func__);
return mnt;
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index aa77edc12212..9c45b3a82ad9 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -55,6 +55,7 @@
#define CIFS_MOUNT_MODE_FROM_SID 0x10000000 /* retrieve mode from special ACE */
#define CIFS_MOUNT_RO_CACHE 0x20000000 /* assumes share will not change */
#define CIFS_MOUNT_RW_CACHE 0x40000000 /* assumes only client accessing */
+#define CIFS_MOUNT_SHUTDOWN 0x80000000
struct cifs_sb_info {
struct rb_root tlink_tree;
@@ -81,5 +82,9 @@ struct cifs_sb_info {
* (cifs_autodisable_serverino) in order to match new mounts.
*/
bool mnt_cifs_serverino_autodisabled;
+ /*
+ * Available once the mount has completed.
+ */
+ struct dentry *root;
};
#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h
index 153d5c842a9b..37fc7d6ac457 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -57,6 +57,12 @@ struct smb_query_info {
/* char buffer[]; */
} __packed;
+/*
+ * Dumping the commonly used 16 byte (e.g. CCM and GCM128) keys still supported
+ * for backlevel compatibility, but is not sufficient for dumping the less
+ * frequently used GCM256 (32 byte) keys (see the newer "CIFS_DUMP_FULL_KEY"
+ * ioctl for dumping decryption info for GCM256 mounts)
+ */
struct smb3_key_debug_info {
__u64 Suid;
__u16 cipher_type;
@@ -65,6 +71,31 @@ struct smb3_key_debug_info {
__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
} __packed;
+/*
+ * Dump variable-sized keys
+ */
+struct smb3_full_key_debug_info {
+ /* INPUT: size of userspace buffer */
+ __u32 in_size;
+
+ /*
+ * INPUT: 0 for current user, otherwise session to dump
+ * OUTPUT: session id that was dumped
+ */
+ __u64 session_id;
+ __u16 cipher_type;
+ __u8 session_key_length;
+ __u8 server_in_key_length;
+ __u8 server_out_key_length;
+ __u8 data[];
+ /*
+ * return this struct with the keys appended at the end:
+ * __u8 session_key[session_key_length];
+ * __u8 server_in_key[server_in_key_length];
+ * __u8 server_out_key[server_out_key_length];
+ */
+} __packed;
+
struct smb3_notify {
__u32 completion_filter;
bool watch_tree;
@@ -78,3 +109,20 @@ struct smb3_notify {
#define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info)
#define CIFS_DUMP_KEY _IOWR(CIFS_IOCTL_MAGIC, 8, struct smb3_key_debug_info)
#define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify)
+#define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info)
+#define CIFS_IOC_SHUTDOWN _IOR ('X', 125, __u32)
+
+/*
+ * Flags for going down operation
+ */
+#define CIFS_GOING_FLAGS_DEFAULT 0x0 /* going down */
+#define CIFS_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
+#define CIFS_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
+
+static inline bool cifs_forced_shutdown(struct cifs_sb_info *sbi)
+{
+ if (CIFS_MOUNT_SHUTDOWN & sbi->mnt_cifs_flags)
+ return true;
+ else
+ return false;
+}
diff --git a/fs/cifs/cifs_swn.h b/fs/cifs/cifs_swn.h
index 236ecd4959d5..8a9d2a5c9077 100644
--- a/fs/cifs/cifs_swn.h
+++ b/fs/cifs/cifs_swn.h
@@ -7,11 +7,13 @@
#ifndef _CIFS_SWN_H
#define _CIFS_SWN_H
+#include "cifsglob.h"
struct cifs_tcon;
struct sk_buff;
struct genl_info;
+#ifdef CONFIG_CIFS_SWN_UPCALL
extern int cifs_swn_register(struct cifs_tcon *tcon);
extern int cifs_swn_unregister(struct cifs_tcon *tcon);
@@ -22,4 +24,29 @@ extern void cifs_swn_dump(struct seq_file *m);
extern void cifs_swn_check(void);
+static inline bool cifs_swn_set_server_dstaddr(struct TCP_Server_Info *server)
+{
+ if (server->use_swn_dstaddr) {
+ server->dstaddr = server->swn_dstaddr;
+ return true;
+ }
+ return false;
+}
+
+static inline void cifs_swn_reset_server_dstaddr(struct TCP_Server_Info *server)
+{
+ server->use_swn_dstaddr = false;
+}
+
+#else
+
+static inline int cifs_swn_register(struct cifs_tcon *tcon) { return 0; }
+static inline int cifs_swn_unregister(struct cifs_tcon *tcon) { return 0; }
+static inline int cifs_swn_notify(struct sk_buff *s, struct genl_info *i) { return 0; }
+static inline void cifs_swn_dump(struct seq_file *m) {}
+static inline void cifs_swn_check(void) {}
+static inline bool cifs_swn_set_server_dstaddr(struct TCP_Server_Info *server) { return false; }
+static inline void cifs_swn_reset_server_dstaddr(struct TCP_Server_Info *server) {}
+
+#endif /* CONFIG_CIFS_SWN_UPCALL */
#endif /* _CIFS_SWN_H */
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index d178cf85e926..784407f9280f 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1094,11 +1094,9 @@ static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
struct cifs_ace *pnntace = NULL;
char *nacl_base = NULL;
u32 num_aces = 0;
- __u64 nmode;
bool new_aces_set = false;
/* Assuming that pndacl and pnmode are never NULL */
- nmode = *pnmode;
nacl_base = (char *)pndacl;
nsize = sizeof(struct cifs_acl);
@@ -1651,7 +1649,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
* Add three ACEs for owner, group, everyone getting rid of other ACEs
* as chmod disables ACEs and set the security descriptor. Allocate
* memory for the smb header, set security descriptor request security
- * descriptor parameters, and secuirty descriptor itself
+ * descriptor parameters, and security descriptor itself
*/
nsecdesclen = max_t(u32, nsecdesclen, DEFAULT_SEC_DESC_LEN);
pnntsd = kmalloc(nsecdesclen, GFP_KERNEL);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5ddd20b62484..2ffcb29d5c8f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -75,7 +75,7 @@ bool enable_oplocks = true;
bool linuxExtEnabled = true;
bool lookupCacheEnabled = true;
bool disable_legacy_dialects; /* false by default */
-bool enable_gcm_256; /* false by default, change when more servers support it */
+bool enable_gcm_256 = true;
bool require_gcm_256; /* false by default */
unsigned int global_secflags = CIFSSEC_DEF;
/* unsigned int ntlmv2_support = 0; */
@@ -133,6 +133,7 @@ struct workqueue_struct *cifsiod_wq;
struct workqueue_struct *decrypt_wq;
struct workqueue_struct *fileinfo_put_wq;
struct workqueue_struct *cifsoplockd_wq;
+struct workqueue_struct *deferredclose_wq;
__u32 cifs_lock_secret;
/*
@@ -217,8 +218,11 @@ cifs_read_super(struct super_block *sb)
rc = super_setup_bdi(sb);
if (rc)
goto out_no_root;
- /* tune readahead according to rsize */
- sb->s_bdi->ra_pages = cifs_sb->ctx->rsize / PAGE_SIZE;
+ /* tune readahead according to rsize if readahead size not set on mount */
+ if (cifs_sb->ctx->rasize)
+ sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE;
+ else
+ sb->s_bdi->ra_pages = cifs_sb->ctx->rsize / PAGE_SIZE;
sb->s_blocksize = CIFS_MAX_MSGSIZE;
sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
@@ -257,6 +261,29 @@ out_no_root:
static void cifs_kill_sb(struct super_block *sb)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifs_tcon *tcon;
+ struct cached_fid *cfid;
+
+ /*
+ * We ned to release all dentries for the cached directories
+ * before we kill the sb.
+ */
+ if (cifs_sb->root) {
+ dput(cifs_sb->root);
+ cifs_sb->root = NULL;
+ }
+ tcon = cifs_sb_master_tcon(cifs_sb);
+ if (tcon) {
+ cfid = &tcon->crfid;
+ mutex_lock(&cfid->fid_mutex);
+ if (cfid->dentry) {
+
+ dput(cfid->dentry);
+ cfid->dentry = NULL;
+ }
+ mutex_unlock(&cfid->fid_mutex);
+ }
+
kill_anon_super(sb);
cifs_umount(cifs_sb);
}
@@ -364,6 +391,8 @@ cifs_alloc_inode(struct super_block *sb)
/* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; */
INIT_LIST_HEAD(&cifs_inode->openFileList);
INIT_LIST_HEAD(&cifs_inode->llist);
+ INIT_LIST_HEAD(&cifs_inode->deferred_closes);
+ spin_lock_init(&cifs_inode->deferred_lock);
return &cifs_inode->vfs_inode;
}
@@ -626,6 +655,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_printf(s, ",rsize=%u", cifs_sb->ctx->rsize);
seq_printf(s, ",wsize=%u", cifs_sb->ctx->wsize);
seq_printf(s, ",bsize=%u", cifs_sb->ctx->bsize);
+ if (cifs_sb->ctx->rasize)
+ seq_printf(s, ",rasize=%u", cifs_sb->ctx->rasize);
if (tcon->ses->server->min_offload)
seq_printf(s, ",esize=%u", tcon->ses->server->min_offload);
seq_printf(s, ",echo_interval=%lu",
@@ -656,10 +687,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_printf(s, ",multichannel,max_channels=%zu",
tcon->ses->chan_max);
-#ifdef CONFIG_CIFS_SWN_UPCALL
if (tcon->use_witness)
seq_puts(s, ",witness");
-#endif
return 0;
}
@@ -834,7 +863,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
goto out;
}
- rc = cifs_setup_volume_info(cifs_sb->ctx, NULL, old_ctx->UNC);
+ rc = cifs_setup_volume_info(cifs_sb->ctx, NULL, NULL);
if (rc) {
root = ERR_PTR(rc);
goto out;
@@ -888,6 +917,9 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
if (IS_ERR(root))
goto out_super;
+ if (cifs_sb)
+ cifs_sb->root = dget(root);
+
cifs_dbg(FYI, "dentry root is: %p\n", root);
return root;
@@ -1528,10 +1560,6 @@ init_cifs(void)
int rc = 0;
cifs_proc_init();
INIT_LIST_HEAD(&cifs_tcp_ses_list);
-#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
- INIT_LIST_HEAD(&GlobalDnotifyReqList);
- INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
-#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
/*
* Initialize Global counters
*/
@@ -1606,9 +1634,16 @@ init_cifs(void)
goto out_destroy_fileinfo_put_wq;
}
+ deferredclose_wq = alloc_workqueue("deferredclose",
+ WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ if (!deferredclose_wq) {
+ rc = -ENOMEM;
+ goto out_destroy_cifsoplockd_wq;
+ }
+
rc = cifs_fscache_register();
if (rc)
- goto out_destroy_cifsoplockd_wq;
+ goto out_destroy_deferredclose_wq;
rc = cifs_init_inodecache();
if (rc)
@@ -1676,6 +1711,8 @@ out_destroy_inodecache:
cifs_destroy_inodecache();
out_unreg_fscache:
cifs_fscache_unregister();
+out_destroy_deferredclose_wq:
+ destroy_workqueue(deferredclose_wq);
out_destroy_cifsoplockd_wq:
destroy_workqueue(cifsoplockd_wq);
out_destroy_fileinfo_put_wq:
@@ -1710,6 +1747,7 @@ exit_cifs(void)
cifs_destroy_mids();
cifs_destroy_inodecache();
cifs_fscache_unregister();
+ destroy_workqueue(deferredclose_wq);
destroy_workqueue(cifsoplockd_wq);
destroy_workqueue(decrypt_wq);
destroy_workqueue(fileinfo_put_wq);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 0d7ef150dbb2..6beddb108ba0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -165,5 +165,5 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.31"
+#define CIFS_VERSION "2.32"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ec824ab8c5ca..8488d7024462 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -495,7 +495,7 @@ struct smb_version_operations {
struct inode *inode,
struct dentry *dentry,
struct cifs_tcon *tcon,
- char *full_path,
+ const char *full_path,
umode_t mode,
dev_t device_number);
/* version specific fiemap implementation */
@@ -988,10 +988,12 @@ struct cached_fid {
bool is_valid:1; /* Do we have a useable root fid */
bool file_all_info_is_valid:1;
bool has_lease:1;
+ unsigned long time; /* jiffies of when lease was taken */
struct kref refcount;
struct cifs_fid *fid;
struct mutex fid_mutex;
struct cifs_tcon *tcon;
+ struct dentry *dentry;
struct work_struct lease_break;
struct smb2_file_all_info file_all_info;
};
@@ -1070,6 +1072,7 @@ struct cifs_tcon {
bool use_resilient:1; /* use resilient instead of durable handles */
bool use_persistent:1; /* use persistent instead of durable handles */
bool no_lease:1; /* Do not request leases on files or directories */
+ bool use_witness:1; /* use witness protocol */
__le32 capabilities;
__u32 share_flags;
__u32 maximal_access;
@@ -1094,9 +1097,6 @@ struct cifs_tcon {
int remap:2;
struct list_head ulist; /* cache update list */
#endif
-#ifdef CONFIG_CIFS_SWN_UPCALL
- bool use_witness:1; /* use witness protocol */
-#endif
};
/*
@@ -1154,6 +1154,14 @@ struct cifs_pending_open {
__u32 oplock;
};
+struct cifs_deferred_close {
+ struct list_head dlist;
+ struct tcon_link *tlink;
+ __u16 netfid;
+ __u64 persistent_fid;
+ __u64 volatile_fid;
+};
+
/*
* This info hangs off the cifsFileInfo structure, pointed to by llist.
* This is used to track byte stream locks on the file
@@ -1248,6 +1256,8 @@ struct cifsFileInfo {
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
struct work_struct put; /* work for the final part of _put */
+ struct delayed_work deferred;
+ bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
};
struct cifs_io_parms {
@@ -1316,8 +1326,6 @@ struct cifs_readdata {
struct page **pages;
};
-struct cifs_writedata;
-
/* asynchronous write support */
struct cifs_writedata {
struct kref refcount;
@@ -1394,6 +1402,7 @@ struct cifsInodeInfo {
#define CIFS_INO_DELETE_PENDING (3) /* delete pending on server */
#define CIFS_INO_INVALID_MAPPING (4) /* pagecache is invalid */
#define CIFS_INO_LOCK (5) /* lock bit for synchronization */
+#define CIFS_INO_MODIFIED_ATTR (6) /* Indicate change in mtime/ctime */
unsigned long flags;
spinlock_t writers_lock;
unsigned int writers; /* Number of writers on this inode */
@@ -1406,6 +1415,9 @@ struct cifsInodeInfo {
struct fscache_cookie *fscache;
#endif
struct inode vfs_inode;
+ struct list_head deferred_closes; /* list of deferred closes */
+ spinlock_t deferred_lock; /* protection on deferred list */
+ bool lease_granted; /* Flag to indicate whether lease or oplock is granted. */
};
static inline struct cifsInodeInfo *
@@ -1796,9 +1808,8 @@ require use of the stronger protocol */
*
* Semaphores
* ----------
- * sesSem operations on smb session
- * tconSem operations on tree connection
- * fh_sem file handle reconnection operations
+ * cifsInodeInfo->lock_sem protects:
+ * the list of locks held by the inode
*
****************************************************************************/
@@ -1829,13 +1840,6 @@ GLOBAL_EXTERN struct list_head cifs_tcp_ses_list;
*/
GLOBAL_EXTERN spinlock_t cifs_tcp_ses_lock;
-#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
-/* Outstanding dir notify requests */
-GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
-/* DirNotify response queue */
-GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;
-#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
-
/*
* Global transaction id (XID) information
*/
@@ -1879,23 +1883,16 @@ extern unsigned int cifs_min_small; /* min size of small buf pool */
extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
extern bool disable_legacy_dialects; /* forbid vers=1.0 and vers=2.0 mounts */
-GLOBAL_EXTERN struct rb_root uidtree;
-GLOBAL_EXTERN struct rb_root gidtree;
-GLOBAL_EXTERN spinlock_t siduidlock;
-GLOBAL_EXTERN spinlock_t sidgidlock;
-GLOBAL_EXTERN struct rb_root siduidtree;
-GLOBAL_EXTERN struct rb_root sidgidtree;
-GLOBAL_EXTERN spinlock_t uidsidlock;
-GLOBAL_EXTERN spinlock_t gidsidlock;
-
void cifs_oplock_break(struct work_struct *work);
void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
+void smb2_deferred_work_close(struct work_struct *work);
extern const struct slow_work_ops cifs_oplock_break_ops;
extern struct workqueue_struct *cifsiod_wq;
extern struct workqueue_struct *decrypt_wq;
extern struct workqueue_struct *fileinfo_put_wq;
extern struct workqueue_struct *cifsoplockd_wq;
+extern struct workqueue_struct *deferredclose_wq;
extern __u32 cifs_lock_secret;
extern mempool_t *cifs_mid_poolp;
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 9adc74bd9f8f..554d64fe171e 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -148,7 +148,8 @@
#define SMB3_SIGN_KEY_SIZE (16)
/*
- * Size of the smb3 encryption/decryption keys
+ * Size of the smb3 encryption/decryption key storage.
+ * This size is big enough to store any cipher key types.
*/
#define SMB3_ENC_DEC_KEY_SIZE (32)
@@ -1903,7 +1904,7 @@ typedef struct smb_com_transaction2_fnext_req {
__le16 InformationLevel;
__u32 ResumeKey;
__le16 SearchFlags;
- char ResumeFileName[1];
+ char ResumeFileName[];
} __attribute__((packed)) TRANSACTION2_FNEXT_REQ;
typedef struct smb_com_transaction2_fnext_rsp {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 75ce6f742b8d..d30cba44ba29 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -69,9 +69,20 @@ extern int init_cifs_idmap(void);
extern void exit_cifs_idmap(void);
extern int init_cifs_spnego(void);
extern void exit_cifs_spnego(void);
-extern char *build_path_from_dentry(struct dentry *);
+extern const char *build_path_from_dentry(struct dentry *, void *);
extern char *build_path_from_dentry_optional_prefix(struct dentry *direntry,
- bool prefix);
+ void *page, bool prefix);
+static inline void *alloc_dentry_path(void)
+{
+ return __getname();
+}
+
+static inline void free_dentry_path(void *page)
+{
+ if (page)
+ __putname(page);
+}
+
extern char *cifs_build_path_to_root(struct smb3_fs_context *ctx,
struct cifs_sb_info *cifs_sb,
struct cifs_tcon *tcon,
@@ -184,7 +195,7 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid,
struct file *file,
struct tcon_link *tlink,
__u32 oplock);
-extern int cifs_posix_open(char *full_path, struct inode **inode,
+extern int cifs_posix_open(const char *full_path, struct inode **inode,
struct super_block *sb, int mode,
unsigned int f_flags, __u32 *oplock, __u16 *netfid,
unsigned int xid);
@@ -194,7 +205,7 @@ extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
struct cifs_sb_info *cifs_sb);
extern void cifs_dir_info_to_fattr(struct cifs_fattr *, FILE_DIRECTORY_INFO *,
struct cifs_sb_info *);
-extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
+extern int cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
extern struct inode *cifs_iget(struct super_block *sb,
struct cifs_fattr *fattr);
@@ -207,7 +218,7 @@ extern int cifs_get_inode_info_unix(struct inode **pinode,
const unsigned char *search_path,
struct super_block *sb, unsigned int xid);
extern int cifs_set_file_info(struct inode *inode, struct iattr *attrs,
- unsigned int xid, char *full_path, __u32 dosattr);
+ unsigned int xid, const char *full_path, __u32 dosattr);
extern int cifs_rename_pending_delete(const char *full_path,
struct dentry *dentry,
const unsigned int xid);
@@ -256,6 +267,19 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
struct tcon_link *tlink,
struct cifs_pending_open *open);
extern void cifs_del_pending_open(struct cifs_pending_open *open);
+
+extern bool cifs_is_deferred_close(struct cifsFileInfo *cfile,
+ struct cifs_deferred_close **dclose);
+
+extern void cifs_add_deferred_close(struct cifsFileInfo *cfile,
+ struct cifs_deferred_close *dclose);
+
+extern void cifs_del_deferred_close(struct cifsFileInfo *cfile);
+
+extern void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode);
+
+extern void cifs_close_all_deferred_files(struct cifs_tcon *cifs_tcon);
+
extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
int from_reconnect);
@@ -358,11 +382,6 @@ extern int CIFSSMBSetFileDisposition(const unsigned int xid,
struct cifs_tcon *tcon,
bool delete_file, __u16 fid,
__u32 pid_of_opener);
-#if 0
-extern int CIFSSMBSetAttrLegacy(unsigned int xid, struct cifs_tcon *tcon,
- char *fileName, __u16 dos_attributes,
- const struct nls_table *nls_codepage);
-#endif /* possibly unneeded function */
extern int CIFSSMBSetEOF(const unsigned int xid, struct cifs_tcon *tcon,
const char *file_name, __u64 size,
struct cifs_sb_info *cifs_sb, bool set_allocation);
@@ -504,12 +523,6 @@ extern int generate_smb311signingkey(struct cifs_ses *);
extern int calc_lanman_hash(const char *password, const char *cryptkey,
bool encrypt, char *lnm_session_key);
#endif /* CIFS_WEAK_PW_HASH */
-#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
-extern int CIFSSMBNotify(const unsigned int xid, struct cifs_tcon *tcon,
- const int notify_subdirs, const __u16 netfid,
- __u32 filter, struct file *file, int multishot,
- const struct nls_table *nls_codepage);
-#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
extern int CIFSSMBCopy(unsigned int xid,
struct cifs_tcon *source_tcon,
const char *fromName,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c279527aae92..41f74163cc1c 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -114,7 +114,7 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
mutex_lock(&tcon->crfid.fid_mutex);
tcon->crfid.is_valid = false;
/* cached handle is not valid, so SMB2_CLOSE won't be sent below */
- close_shroot_lease_locked(&tcon->crfid);
+ close_cached_dir_lease_locked(&tcon->crfid);
memset(tcon->crfid.fid, 0, sizeof(struct cifs_fid));
mutex_unlock(&tcon->crfid.fid_mutex);
@@ -5917,56 +5917,6 @@ SetTimesRetry:
return rc;
}
-/* Can not be used to set time stamps yet (due to old DOS time format) */
-/* Can be used to set attributes */
-#if 0 /* Possibly not needed - since it turns out that strangely NT4 has a bug
- handling it anyway and NT4 was what we thought it would be needed for
- Do not delete it until we prove whether needed for Win9x though */
-int
-CIFSSMBSetAttrLegacy(unsigned int xid, struct cifs_tcon *tcon, char *fileName,
- __u16 dos_attrs, const struct nls_table *nls_codepage)
-{
- SETATTR_REQ *pSMB = NULL;
- SETATTR_RSP *pSMBr = NULL;
- int rc = 0;
- int bytes_returned;
- int name_len;
-
- cifs_dbg(FYI, "In SetAttrLegacy\n");
-
-SetAttrLgcyRetry:
- rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB,
- (void **) &pSMBr);
- if (rc)
- return rc;
-
- if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
- name_len =
- ConvertToUTF16((__le16 *) pSMB->fileName, fileName,
- PATH_MAX, nls_codepage);
- name_len++; /* trailing null */
- name_len *= 2;
- } else {
- name_len = copy_path_name(pSMB->fileName, fileName);
- }
- pSMB->attr = cpu_to_le16(dos_attrs);
- pSMB->BufferFormat = 0x04;
- inc_rfc1001_len(pSMB, name_len + 1);
- pSMB->ByteCount = cpu_to_le16(name_len + 1);
- rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
- (struct smb_hdr *) pSMBr, &bytes_returned, 0);
- if (rc)
- cifs_dbg(FYI, "Error in LegacySetAttr = %d\n", rc);
-
- cifs_buf_release(pSMB);
-
- if (rc == -EAGAIN)
- goto SetAttrLgcyRetry;
-
- return rc;
-}
-#endif /* temporarily unneeded SetAttr legacy function */
-
static void
cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
const struct cifs_unix_set_info_args *args)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 24668eb006c6..495c395f9def 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -62,9 +62,7 @@
#include "dfs_cache.h"
#endif
#include "fs_context.h"
-#ifdef CONFIG_CIFS_SWN_UPCALL
#include "cifs_swn.h"
-#endif
extern mempool_t *cifs_req_poolp;
extern bool disable_legacy_dialects;
@@ -314,12 +312,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
mutex_lock(&server->srv_mutex);
-#ifdef CONFIG_CIFS_SWN_UPCALL
- if (server->use_swn_dstaddr) {
- server->dstaddr = server->swn_dstaddr;
- } else {
-#endif
+ if (!cifs_swn_set_server_dstaddr(server)) {
#ifdef CONFIG_CIFS_DFS_UPCALL
if (cifs_sb && cifs_sb->origin_fullpath)
/*
@@ -344,9 +338,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
#endif
-#ifdef CONFIG_CIFS_SWN_UPCALL
}
-#endif
if (cifs_rdma_enabled(server))
rc = smbd_reconnect(server);
@@ -363,9 +355,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
if (server->tcpStatus != CifsExiting)
server->tcpStatus = CifsNeedNegotiate;
spin_unlock(&GlobalMid_Lock);
-#ifdef CONFIG_CIFS_SWN_UPCALL
- server->use_swn_dstaddr = false;
-#endif
+ cifs_swn_reset_server_dstaddr(server);
mutex_unlock(&server->srv_mutex);
}
} while (server->tcpStatus == CifsNeedReconnect);
@@ -402,16 +392,6 @@ cifs_echo_request(struct work_struct *work)
int rc;
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, echo.work);
- unsigned long echo_interval;
-
- /*
- * If we need to renegotiate, set echo interval to zero to
- * immediately call echo service where we can renegotiate.
- */
- if (server->tcpStatus == CifsNeedNegotiate)
- echo_interval = 0;
- else
- echo_interval = server->echo_interval;
/*
* We cannot send an echo if it is disabled.
@@ -422,7 +402,7 @@ cifs_echo_request(struct work_struct *work)
server->tcpStatus == CifsExiting ||
server->tcpStatus == CifsNew ||
(server->ops->can_echo && !server->ops->can_echo(server)) ||
- time_before(jiffies, server->lstrp + echo_interval - HZ))
+ time_before(jiffies, server->lstrp + server->echo_interval - HZ))
goto requeue_echo;
rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
@@ -430,10 +410,8 @@ cifs_echo_request(struct work_struct *work)
cifs_dbg(FYI, "Unable to send echo request to server: %s\n",
server->hostname);
-#ifdef CONFIG_CIFS_SWN_UPCALL
/* Check witness registrations */
cifs_swn_check();
-#endif
requeue_echo:
queue_delayed_work(cifsiod_wq, &server->echo, server->echo_interval);
@@ -488,6 +466,7 @@ server_unresponsive(struct TCP_Server_Info *server)
*/
if ((server->tcpStatus == CifsGood ||
server->tcpStatus == CifsNeedNegotiate) &&
+ (!server->ops->can_echo || server->ops->can_echo(server)) &&
time_after(jiffies, server->lstrp + 3 * server->echo_interval)) {
cifs_server_dbg(VFS, "has not responded in %lu seconds. Reconnecting...\n",
(3 * server->echo_interval) / HZ);
@@ -1790,9 +1769,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
* for the request.
*/
if (is_domain && ses->domainName) {
- ctx->domainname = kstrndup(ses->domainName,
- strlen(ses->domainName),
- GFP_KERNEL);
+ ctx->domainname = kstrdup(ses->domainName, GFP_KERNEL);
if (!ctx->domainname) {
cifs_dbg(FYI, "Unable to allocate %zd bytes for domain\n",
len);
@@ -2009,7 +1986,6 @@ cifs_put_tcon(struct cifs_tcon *tcon)
return;
}
-#ifdef CONFIG_CIFS_SWN_UPCALL
if (tcon->use_witness) {
int rc;
@@ -2019,7 +1995,6 @@ cifs_put_tcon(struct cifs_tcon *tcon)
__func__, rc);
}
}
-#endif
list_del_init(&tcon->tcon_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -2181,9 +2156,9 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
}
tcon->use_resilient = true;
}
-#ifdef CONFIG_CIFS_SWN_UPCALL
+
tcon->use_witness = false;
- if (ctx->witness) {
+ if (IS_ENABLED(CONFIG_CIFS_SWN_UPCALL) && ctx->witness) {
if (ses->server->vals->protocol_id >= SMB30_PROT_ID) {
if (tcon->capabilities & SMB2_SHARE_CAP_CLUSTER) {
/*
@@ -2209,7 +2184,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
goto out_fail;
}
}
-#endif
/* If the user really knows what they are doing they can override */
if (tcon->share_flags & SMB2_SHAREFLAG_NO_CACHING) {
@@ -3175,17 +3149,29 @@ out:
int
cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const char *devname)
{
- int rc = 0;
+ int rc;
- smb3_parse_devname(devname, ctx);
+ if (devname) {
+ cifs_dbg(FYI, "%s: devname=%s\n", __func__, devname);
+ rc = smb3_parse_devname(devname, ctx);
+ if (rc) {
+ cifs_dbg(VFS, "%s: failed to parse %s: %d\n", __func__, devname, rc);
+ return rc;
+ }
+ }
if (mntopts) {
char *ip;
- cifs_dbg(FYI, "%s: mntopts=%s\n", __func__, mntopts);
rc = smb3_parse_opt(mntopts, "ip", &ip);
- if (!rc && !cifs_convert_address((struct sockaddr *)&ctx->dstaddr, ip,
- strlen(ip))) {
+ if (rc) {
+ cifs_dbg(VFS, "%s: failed to parse ip options: %d\n", __func__, rc);
+ return rc;
+ }
+
+ rc = cifs_convert_address((struct sockaddr *)&ctx->dstaddr, ip, strlen(ip));
+ kfree(ip);
+ if (!rc) {
cifs_dbg(VFS, "%s: failed to convert ip address\n", __func__);
return -EINVAL;
}
@@ -3205,7 +3191,7 @@ cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const c
return -EINVAL;
}
- return rc;
+ return 0;
}
static int
@@ -3426,8 +3412,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
goto error;
}
/* Save mount options */
- mntdata = kstrndup(cifs_sb->ctx->mount_options,
- strlen(cifs_sb->ctx->mount_options), GFP_KERNEL);
+ mntdata = kstrdup(cifs_sb->ctx->mount_options, GFP_KERNEL);
if (!mntdata) {
rc = -ENOMEM;
goto error;
@@ -3500,7 +3485,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
* links, the prefix path is included in both and may be changed during reconnect. See
* cifs_tree_connect().
*/
- cifs_sb->origin_fullpath = kstrndup(full_path, strlen(full_path), GFP_KERNEL);
+ cifs_sb->origin_fullpath = kstrdup(full_path, GFP_KERNEL);
if (!cifs_sb->origin_fullpath) {
rc = -ENOMEM;
goto error;
@@ -3877,9 +3862,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
ctx->sectype = master_tcon->ses->sectype;
ctx->sign = master_tcon->ses->sign;
ctx->seal = master_tcon->seal;
-#ifdef CONFIG_CIFS_SWN_UPCALL
ctx->witness = master_tcon->use_witness;
-#endif
rc = cifs_set_vol_auth(ctx, master_tcon->ses);
if (rc) {
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 098b4bc8da59..b1fa30fefe1f 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -81,23 +81,24 @@ static void refresh_cache_worker(struct work_struct *work);
static DECLARE_DELAYED_WORK(refresh_task, refresh_cache_worker);
-static int get_normalized_path(const char *path, char **npath)
+static int get_normalized_path(const char *path, const char **npath)
{
if (!path || strlen(path) < 3 || (*path != '\\' && *path != '/'))
return -EINVAL;
if (*path == '\\') {
- *npath = (char *)path;
+ *npath = path;
} else {
- *npath = kstrndup(path, strlen(path), GFP_KERNEL);
- if (!*npath)
+ char *s = kstrdup(path, GFP_KERNEL);
+ if (!s)
return -ENOMEM;
- convert_delimiter(*npath, '\\');
+ convert_delimiter(s, '\\');
+ *npath = s;
}
return 0;
}
-static inline void free_normalized_path(const char *path, char *npath)
+static inline void free_normalized_path(const char *path, const char *npath)
{
if (path != npath)
kfree(npath);
@@ -358,7 +359,7 @@ static struct cache_dfs_tgt *alloc_target(const char *name, int path_consumed)
t = kmalloc(sizeof(*t), GFP_ATOMIC);
if (!t)
return ERR_PTR(-ENOMEM);
- t->name = kstrndup(name, strlen(name), GFP_ATOMIC);
+ t->name = kstrdup(name, GFP_ATOMIC);
if (!t->name) {
kfree(t);
return ERR_PTR(-ENOMEM);
@@ -419,7 +420,7 @@ static struct cache_entry *alloc_cache_entry(const char *path,
if (!ce)
return ERR_PTR(-ENOMEM);
- ce->path = kstrndup(path, strlen(path), GFP_KERNEL);
+ ce->path = kstrdup(path, GFP_KERNEL);
if (!ce->path) {
kmem_cache_free(cache_slab, ce);
return ERR_PTR(-ENOMEM);
@@ -531,7 +532,7 @@ static struct cache_entry *lookup_cache_entry(const char *path, unsigned int *ha
char *s, *e;
char sep;
- npath = kstrndup(path, strlen(path), GFP_KERNEL);
+ npath = kstrdup(path, GFP_KERNEL);
if (!npath)
return ERR_PTR(-ENOMEM);
@@ -641,7 +642,7 @@ static int __update_cache_entry(const char *path,
if (ce->tgthint) {
s = ce->tgthint->name;
- th = kstrndup(s, strlen(s), GFP_ATOMIC);
+ th = kstrdup(s, GFP_ATOMIC);
if (!th)
return -ENOMEM;
}
@@ -786,11 +787,11 @@ static int setup_referral(const char *path, struct cache_entry *ce,
memset(ref, 0, sizeof(*ref));
- ref->path_name = kstrndup(path, strlen(path), GFP_ATOMIC);
+ ref->path_name = kstrdup(path, GFP_ATOMIC);
if (!ref->path_name)
return -ENOMEM;
- ref->node_name = kstrndup(target, strlen(target), GFP_ATOMIC);
+ ref->node_name = kstrdup(target, GFP_ATOMIC);
if (!ref->node_name) {
rc = -ENOMEM;
goto err_free_path;
@@ -828,7 +829,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl)
goto err_free_it;
}
- it->it_name = kstrndup(t->name, strlen(t->name), GFP_ATOMIC);
+ it->it_name = kstrdup(t->name, GFP_ATOMIC);
if (!it->it_name) {
kfree(it);
rc = -ENOMEM;
@@ -882,7 +883,7 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses,
struct dfs_cache_tgt_list *tgt_list)
{
int rc;
- char *npath;
+ const char *npath;
struct cache_entry *ce;
rc = get_normalized_path(path, &npath);
@@ -936,7 +937,7 @@ int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref,
struct dfs_cache_tgt_list *tgt_list)
{
int rc;
- char *npath;
+ const char *npath;
struct cache_entry *ce;
rc = get_normalized_path(path, &npath);
@@ -991,7 +992,7 @@ int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses,
const struct dfs_cache_tgt_iterator *it)
{
int rc;
- char *npath;
+ const char *npath;
struct cache_entry *ce;
struct cache_dfs_tgt *t;
@@ -1053,7 +1054,7 @@ int dfs_cache_noreq_update_tgthint(const char *path,
const struct dfs_cache_tgt_iterator *it)
{
int rc;
- char *npath;
+ const char *npath;
struct cache_entry *ce;
struct cache_dfs_tgt *t;
@@ -1111,7 +1112,7 @@ int dfs_cache_get_tgt_referral(const char *path,
struct dfs_info3_param *ref)
{
int rc;
- char *npath;
+ const char *npath;
struct cache_entry *ce;
if (!it || !ref)
@@ -1166,7 +1167,7 @@ int dfs_cache_add_vol(char *mntdata, struct smb3_fs_context *ctx, const char *fu
if (!vi)
return -ENOMEM;
- vi->fullpath = kstrndup(fullpath, strlen(fullpath), GFP_KERNEL);
+ vi->fullpath = kstrdup(fullpath, GFP_KERNEL);
if (!vi->fullpath) {
rc = -ENOMEM;
goto err_free_vi;
@@ -1484,7 +1485,7 @@ static int refresh_tcon(struct vol_info *vi, struct cifs_tcon *tcon)
{
int rc = 0;
unsigned int xid;
- char *path, *npath;
+ const char *path, *npath;
struct cache_entry *ce;
struct cifs_ses *root_ses = NULL, *ses;
struct dfs_info3_param *refs = NULL;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index a3fb81e0ba17..6bcd3e8f7cda 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -34,6 +34,7 @@
#include "cifs_fs_sb.h"
#include "cifs_unicode.h"
#include "fs_context.h"
+#include "cifs_ioctl.h"
static void
renew_parental_timestamps(struct dentry *direntry)
@@ -78,31 +79,31 @@ cifs_build_path_to_root(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_s
}
/* Note: caller must free return buffer */
-char *
-build_path_from_dentry(struct dentry *direntry)
+const char *
+build_path_from_dentry(struct dentry *direntry, void *page)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
bool prefix = tcon->Flags & SMB_SHARE_IS_IN_DFS;
- return build_path_from_dentry_optional_prefix(direntry,
+ return build_path_from_dentry_optional_prefix(direntry, page,
prefix);
}
char *
-build_path_from_dentry_optional_prefix(struct dentry *direntry, bool prefix)
+build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page,
+ bool prefix)
{
- struct dentry *temp;
- int namelen;
int dfsplen;
int pplen = 0;
- char *full_path;
- char dirsep;
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
- unsigned seq;
+ char dirsep = CIFS_DIR_SEP(cifs_sb);
+ char *s;
+
+ if (unlikely(!page))
+ return ERR_PTR(-ENOMEM);
- dirsep = CIFS_DIR_SEP(cifs_sb);
if (prefix)
dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
else
@@ -111,86 +112,39 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, bool prefix)
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0;
-cifs_bp_rename_retry:
- namelen = dfsplen + pplen;
- seq = read_seqbegin(&rename_lock);
- rcu_read_lock();
- for (temp = direntry; !IS_ROOT(temp);) {
- namelen += (1 + temp->d_name.len);
- temp = temp->d_parent;
- if (temp == NULL) {
- cifs_dbg(VFS, "corrupt dentry\n");
- rcu_read_unlock();
- return NULL;
- }
- }
- rcu_read_unlock();
-
- full_path = kmalloc(namelen+1, GFP_ATOMIC);
- if (full_path == NULL)
- return full_path;
- full_path[namelen] = 0; /* trailing null */
- rcu_read_lock();
- for (temp = direntry; !IS_ROOT(temp);) {
- spin_lock(&temp->d_lock);
- namelen -= 1 + temp->d_name.len;
- if (namelen < 0) {
- spin_unlock(&temp->d_lock);
- break;
- } else {
- full_path[namelen] = dirsep;
- strncpy(full_path + namelen + 1, temp->d_name.name,
- temp->d_name.len);
- cifs_dbg(FYI, "name: %s\n", full_path + namelen);
- }
- spin_unlock(&temp->d_lock);
- temp = temp->d_parent;
- if (temp == NULL) {
- cifs_dbg(VFS, "corrupt dentry\n");
- rcu_read_unlock();
- kfree(full_path);
- return NULL;
- }
- }
- rcu_read_unlock();
- if (namelen != dfsplen + pplen || read_seqretry(&rename_lock, seq)) {
- cifs_dbg(FYI, "did not end path lookup where expected. namelen=%ddfsplen=%d\n",
- namelen, dfsplen);
- /* presumably this is only possible if racing with a rename
- of one of the parent directories (we can not lock the dentries
- above us to prevent this, but retrying should be harmless) */
- kfree(full_path);
- goto cifs_bp_rename_retry;
- }
- /* DIR_SEP already set for byte 0 / vs \ but not for
- subsequent slashes in prepath which currently must
- be entered the right way - not sure if there is an alternative
- since the '\' is a valid posix character so we can not switch
- those safely to '/' if any are found in the middle of the prepath */
- /* BB test paths to Windows with '/' in the midst of prepath */
-
+ s = dentry_path_raw(direntry, page, PAGE_SIZE);
+ if (IS_ERR(s))
+ return s;
+ if (!s[1]) // for root we want "", not "/"
+ s++;
+ if (s < (char *)page + pplen + dfsplen)
+ return ERR_PTR(-ENAMETOOLONG);
if (pplen) {
- int i;
-
cifs_dbg(FYI, "using cifs_sb prepath <%s>\n", cifs_sb->prepath);
- memcpy(full_path+dfsplen+1, cifs_sb->prepath, pplen-1);
- full_path[dfsplen] = dirsep;
- for (i = 0; i < pplen-1; i++)
- if (full_path[dfsplen+1+i] == '/')
- full_path[dfsplen+1+i] = CIFS_DIR_SEP(cifs_sb);
+ s -= pplen;
+ memcpy(s + 1, cifs_sb->prepath, pplen - 1);
+ *s = '/';
}
+ if (dirsep != '/') {
+ /* BB test paths to Windows with '/' in the midst of prepath */
+ char *p;
+ for (p = s; *p; p++)
+ if (*p == '/')
+ *p = dirsep;
+ }
if (dfsplen) {
- strncpy(full_path, tcon->treeName, dfsplen);
+ s -= dfsplen;
+ memcpy(s, tcon->treeName, dfsplen);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
int i;
for (i = 0; i < dfsplen; i++) {
- if (full_path[i] == '\\')
- full_path[i] = '/';
+ if (s[i] == '\\')
+ s[i] = '/';
}
}
}
- return full_path;
+ return s;
}
/*
@@ -233,7 +187,8 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
int desired_access;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *tcon = tlink_tcon(tlink);
- char *full_path = NULL;
+ const char *full_path;
+ void *page = alloc_dentry_path();
FILE_ALL_INFO *buf = NULL;
struct inode *newinode = NULL;
int disposition;
@@ -244,9 +199,11 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
if (tcon->ses->server->oplocks)
*oplock = REQ_OPLOCK;
- full_path = build_path_from_dentry(direntry);
- if (!full_path)
- return -ENOMEM;
+ full_path = build_path_from_dentry(direntry, page);
+ if (IS_ERR(full_path)) {
+ free_dentry_path(page);
+ return PTR_ERR(full_path);
+ }
if (tcon->unix_ext && cap_unix(tcon->ses) && !tcon->broken_posix_open &&
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
@@ -418,15 +375,16 @@ cifs_create_get_file_info:
if (newinode) {
if (server->ops->set_lease_key)
server->ops->set_lease_key(newinode, fid);
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
- newinode->i_mode = mode;
- if ((*oplock & CIFS_CREATE_ACTION) &&
- (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
- newinode->i_uid = current_fsuid();
- if (inode->i_mode & S_ISGID)
- newinode->i_gid = inode->i_gid;
- else
- newinode->i_gid = current_fsgid();
+ if ((*oplock & CIFS_CREATE_ACTION) && S_ISREG(newinode->i_mode)) {
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
+ newinode->i_mode = mode;
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+ newinode->i_uid = current_fsuid();
+ if (inode->i_mode & S_ISGID)
+ newinode->i_gid = inode->i_gid;
+ else
+ newinode->i_gid = current_fsgid();
+ }
}
}
}
@@ -448,7 +406,7 @@ cifs_create_set_dentry:
out:
kfree(buf);
- kfree(full_path);
+ free_dentry_path(page);
return rc;
out_err:
@@ -473,6 +431,9 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
__u32 oplock;
struct cifsFileInfo *file_info;
+ if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
+ return -EIO;
+
/*
* Posix open is only called (at lookup time) for file create now. For
* opens (rather than creates), because we do not know if it is a file
@@ -589,6 +550,9 @@ int cifs_create(struct user_namespace *mnt_userns, struct inode *inode,
cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
inode, direntry, direntry);
+ if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
+ return -EIO;
+
tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
rc = PTR_ERR(tlink);
if (IS_ERR(tlink))
@@ -619,23 +583,27 @@ int cifs_mknod(struct user_namespace *mnt_userns, struct inode *inode,
struct cifs_sb_info *cifs_sb;
struct tcon_link *tlink;
struct cifs_tcon *tcon;
- char *full_path = NULL;
+ const char *full_path;
+ void *page;
if (!old_valid_dev(device_number))
return -EINVAL;
cifs_sb = CIFS_SB(inode->i_sb);
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
+ page = alloc_dentry_path();
tcon = tlink_tcon(tlink);
-
xid = get_xid();
- full_path = build_path_from_dentry(direntry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ full_path = build_path_from_dentry(direntry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto mknod_out;
}
@@ -644,7 +612,7 @@ int cifs_mknod(struct user_namespace *mnt_userns, struct inode *inode,
device_number);
mknod_out:
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
return rc;
@@ -660,7 +628,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
struct tcon_link *tlink;
struct cifs_tcon *pTcon;
struct inode *newInode = NULL;
- char *full_path = NULL;
+ const char *full_path;
+ void *page;
xid = get_xid();
@@ -687,11 +656,13 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
/* can not grab the rename sem here since it would
deadlock in the cases (beginning of sys_rename itself)
in which we already have the sb rename sem */
- full_path = build_path_from_dentry(direntry);
- if (full_path == NULL) {
+ page = alloc_dentry_path();
+ full_path = build_path_from_dentry(direntry, page);
+ if (IS_ERR(full_path)) {
cifs_put_tlink(tlink);
free_xid(xid);
- return ERR_PTR(-ENOMEM);
+ free_dentry_path(page);
+ return ERR_CAST(full_path);
}
if (d_really_is_positive(direntry)) {
@@ -727,7 +698,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
}
newInode = ERR_PTR(rc);
}
- kfree(full_path);
+ free_dentry_path(page);
cifs_put_tlink(tlink);
free_xid(xid);
return d_splice_alias(newInode, direntry);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 042e24aad410..379a427f3c2f 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -45,6 +45,7 @@
#include "fscache.h"
#include "smbdirect.h"
#include "fs_context.h"
+#include "cifs_ioctl.h"
static inline int cifs_convert_flags(unsigned int flags)
{
@@ -112,7 +113,7 @@ static inline int cifs_get_disposition(unsigned int flags)
return FILE_OPEN;
}
-int cifs_posix_open(char *full_path, struct inode **pinode,
+int cifs_posix_open(const char *full_path, struct inode **pinode,
struct super_block *sb, int mode, unsigned int f_flags,
__u32 *poplock, __u16 *pnetfid, unsigned int xid)
{
@@ -166,7 +167,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
}
} else {
cifs_revalidate_mapping(*pinode);
- cifs_fattr_to_inode(*pinode, &fattr);
+ rc = cifs_fattr_to_inode(*pinode, &fattr);
}
posix_open_ret:
@@ -175,7 +176,7 @@ posix_open_ret:
}
static int
-cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
+cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
struct cifs_fid *fid, unsigned int xid)
{
@@ -322,9 +323,11 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
cfile->dentry = dget(dentry);
cfile->f_flags = file->f_flags;
cfile->invalidHandle = false;
+ cfile->deferred_close_scheduled = false;
cfile->tlink = cifs_get_tlink(tlink);
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
INIT_WORK(&cfile->put, cifsFileInfo_put_work);
+ INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
mutex_init(&cfile->fh_mutex);
spin_lock_init(&cfile->file_info_lock);
@@ -530,7 +533,8 @@ int cifs_open(struct inode *inode, struct file *file)
struct cifs_tcon *tcon;
struct tcon_link *tlink;
struct cifsFileInfo *cfile = NULL;
- char *full_path = NULL;
+ void *page;
+ const char *full_path;
bool posix_open_ok = false;
struct cifs_fid fid;
struct cifs_pending_open open;
@@ -538,6 +542,11 @@ int cifs_open(struct inode *inode, struct file *file)
xid = get_xid();
cifs_sb = CIFS_SB(inode->i_sb);
+ if (unlikely(cifs_forced_shutdown(cifs_sb))) {
+ free_xid(xid);
+ return -EIO;
+ }
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
free_xid(xid);
@@ -546,9 +555,10 @@ int cifs_open(struct inode *inode, struct file *file)
tcon = tlink_tcon(tlink);
server = tcon->ses->server;
- full_path = build_path_from_dentry(file_dentry(file));
- if (full_path == NULL) {
- rc = -ENOMEM;
+ page = alloc_dentry_path();
+ full_path = build_path_from_dentry(file_dentry(file), page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto out;
}
@@ -563,6 +573,20 @@ int cifs_open(struct inode *inode, struct file *file)
file->f_op = &cifs_file_direct_ops;
}
+ /* Get the cached handle as SMB2 close is deferred */
+ rc = cifs_get_readable_path(tcon, full_path, &cfile);
+ if (rc == 0) {
+ if (file->f_flags == cfile->f_flags) {
+ file->private_data = cfile;
+ spin_lock(&CIFS_I(inode)->deferred_lock);
+ cifs_del_deferred_close(cfile);
+ spin_unlock(&CIFS_I(inode)->deferred_lock);
+ goto out;
+ } else {
+ _cifsFileInfo_put(cfile, true, false);
+ }
+ }
+
if (server->oplocks)
oplock = REQ_OPLOCK;
else
@@ -640,7 +664,7 @@ int cifs_open(struct inode *inode, struct file *file)
}
out:
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
return rc;
@@ -689,7 +713,8 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
struct TCP_Server_Info *server;
struct cifsInodeInfo *cinode;
struct inode *inode;
- char *full_path = NULL;
+ void *page;
+ const char *full_path;
int desired_access;
int disposition = FILE_OPEN;
int create_options = CREATE_NOT_DIR;
@@ -699,9 +724,8 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
mutex_lock(&cfile->fh_mutex);
if (!cfile->invalidHandle) {
mutex_unlock(&cfile->fh_mutex);
- rc = 0;
free_xid(xid);
- return rc;
+ return 0;
}
inode = d_inode(cfile->dentry);
@@ -715,12 +739,13 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
* called and if the server was down that means we end up here, and we
* can never tell if the caller already has the rename_sem.
*/
- full_path = build_path_from_dentry(cfile->dentry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ page = alloc_dentry_path();
+ full_path = build_path_from_dentry(cfile->dentry, page);
+ if (IS_ERR(full_path)) {
mutex_unlock(&cfile->fh_mutex);
+ free_dentry_path(page);
free_xid(xid);
- return rc;
+ return PTR_ERR(full_path);
}
cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
@@ -838,16 +863,64 @@ reopen_success:
cifs_relock_file(cfile);
reopen_error_exit:
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
return rc;
}
+void smb2_deferred_work_close(struct work_struct *work)
+{
+ struct cifsFileInfo *cfile = container_of(work,
+ struct cifsFileInfo, deferred.work);
+
+ spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+ cifs_del_deferred_close(cfile);
+ cfile->deferred_close_scheduled = false;
+ spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+ _cifsFileInfo_put(cfile, true, false);
+}
+
int cifs_close(struct inode *inode, struct file *file)
{
+ struct cifsFileInfo *cfile;
+ struct cifsInodeInfo *cinode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_deferred_close *dclose;
+
if (file->private_data != NULL) {
- _cifsFileInfo_put(file->private_data, true, false);
+ cfile = file->private_data;
file->private_data = NULL;
+ dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
+ if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
+ cinode->lease_granted &&
+ dclose) {
+ if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
+ inode->i_ctime = inode->i_mtime = current_time(inode);
+ spin_lock(&cinode->deferred_lock);
+ cifs_add_deferred_close(cfile, dclose);
+ if (cfile->deferred_close_scheduled &&
+ delayed_work_pending(&cfile->deferred)) {
+ /*
+ * If there is no pending work, mod_delayed_work queues new work.
+ * So, Increase the ref count to avoid use-after-free.
+ */
+ if (!mod_delayed_work(deferredclose_wq,
+ &cfile->deferred, cifs_sb->ctx->acregmax))
+ cifsFileInfo_get(cfile);
+ } else {
+ /* Deferred close for files */
+ queue_delayed_work(deferredclose_wq,
+ &cfile->deferred, cifs_sb->ctx->acregmax);
+ cfile->deferred_close_scheduled = true;
+ spin_unlock(&cinode->deferred_lock);
+ return 0;
+ }
+ spin_unlock(&cinode->deferred_lock);
+ _cifsFileInfo_put(cfile, true, false);
+ } else {
+ _cifsFileInfo_put(cfile, true, false);
+ kfree(dclose);
+ }
}
/* return code from the ->release op is always ignored */
@@ -1917,8 +1990,10 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
if (total_written > 0) {
spin_lock(&d_inode(dentry)->i_lock);
- if (*offset > d_inode(dentry)->i_size)
+ if (*offset > d_inode(dentry)->i_size) {
i_size_write(d_inode(dentry), *offset);
+ d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
+ }
spin_unlock(&d_inode(dentry)->i_lock);
}
mark_inode_dirty_sync(d_inode(dentry));
@@ -1944,7 +2019,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
continue;
if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
- if (!open_file->invalidHandle) {
+ if ((!open_file->invalidHandle)) {
/* found a good file */
/* lock it so it will not be closed on us */
cifsFileInfo_get(open_file);
@@ -2069,34 +2144,31 @@ cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
int flags,
struct cifsFileInfo **ret_file)
{
- struct list_head *tmp;
struct cifsFileInfo *cfile;
- struct cifsInodeInfo *cinode;
- char *full_path;
+ void *page = alloc_dentry_path();
*ret_file = NULL;
spin_lock(&tcon->open_file_lock);
- list_for_each(tmp, &tcon->openFileList) {
- cfile = list_entry(tmp, struct cifsFileInfo,
- tlist);
- full_path = build_path_from_dentry(cfile->dentry);
- if (full_path == NULL) {
+ list_for_each_entry(cfile, &tcon->openFileList, tlist) {
+ struct cifsInodeInfo *cinode;
+ const char *full_path = build_path_from_dentry(cfile->dentry, page);
+ if (IS_ERR(full_path)) {
spin_unlock(&tcon->open_file_lock);
- return -ENOMEM;
+ free_dentry_path(page);
+ return PTR_ERR(full_path);
}
- if (strcmp(full_path, name)) {
- kfree(full_path);
+ if (strcmp(full_path, name))
continue;
- }
- kfree(full_path);
cinode = CIFS_I(d_inode(cfile->dentry));
spin_unlock(&tcon->open_file_lock);
+ free_dentry_path(page);
return cifs_get_writable_file(cinode, flags, ret_file);
}
spin_unlock(&tcon->open_file_lock);
+ free_dentry_path(page);
return -ENOENT;
}
@@ -2104,35 +2176,32 @@ int
cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
struct cifsFileInfo **ret_file)
{
- struct list_head *tmp;
struct cifsFileInfo *cfile;
- struct cifsInodeInfo *cinode;
- char *full_path;
+ void *page = alloc_dentry_path();
*ret_file = NULL;
spin_lock(&tcon->open_file_lock);
- list_for_each(tmp, &tcon->openFileList) {
- cfile = list_entry(tmp, struct cifsFileInfo,
- tlist);
- full_path = build_path_from_dentry(cfile->dentry);
- if (full_path == NULL) {
+ list_for_each_entry(cfile, &tcon->openFileList, tlist) {
+ struct cifsInodeInfo *cinode;
+ const char *full_path = build_path_from_dentry(cfile->dentry, page);
+ if (IS_ERR(full_path)) {
spin_unlock(&tcon->open_file_lock);
- return -ENOMEM;
+ free_dentry_path(page);
+ return PTR_ERR(full_path);
}
- if (strcmp(full_path, name)) {
- kfree(full_path);
+ if (strcmp(full_path, name))
continue;
- }
- kfree(full_path);
cinode = CIFS_I(d_inode(cfile->dentry));
spin_unlock(&tcon->open_file_lock);
+ free_dentry_path(page);
*ret_file = find_readable_file(cinode, 0);
return *ret_file ? 0 : -ENOENT;
}
spin_unlock(&tcon->open_file_lock);
+ free_dentry_path(page);
return -ENOENT;
}
@@ -2479,6 +2548,8 @@ retry:
if (cfile)
cifsFileInfo_put(cfile);
free_xid(xid);
+ /* Indication to update ctime and mtime as close is deferred */
+ set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
return rc;
}
@@ -2580,13 +2651,17 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
if (rc > 0) {
spin_lock(&inode->i_lock);
- if (pos > inode->i_size)
+ if (pos > inode->i_size) {
i_size_write(inode, pos);
+ inode->i_blocks = (512 - 1 + pos) >> 9;
+ }
spin_unlock(&inode->i_lock);
}
unlock_page(page);
put_page(page);
+ /* Indication to update ctime and mtime as close is deferred */
+ set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
return rc;
}
@@ -4747,6 +4822,8 @@ void cifs_oplock_break(struct work_struct *work)
struct TCP_Server_Info *server = tcon->ses->server;
int rc = 0;
bool purge_cache = false;
+ bool is_deferred = false;
+ struct cifs_deferred_close *dclose;
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
TASK_UNINTERRUPTIBLE);
@@ -4793,6 +4870,24 @@ oplock_break_ack:
cinode);
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
}
+ /*
+ * When oplock break is received and there are no active
+ * file handles but cached, then schedule deferred close immediately.
+ * So, new open will not use cached handle.
+ */
+ spin_lock(&CIFS_I(inode)->deferred_lock);
+ is_deferred = cifs_is_deferred_close(cfile, &dclose);
+ if (is_deferred &&
+ cfile->deferred_close_scheduled &&
+ delayed_work_pending(&cfile->deferred)) {
+ /*
+ * If there is no pending work, mod_delayed_work queues new work.
+ * So, Increase the ref count to avoid use-after-free.
+ */
+ if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
+ cifsFileInfo_get(cfile);
+ }
+ spin_unlock(&CIFS_I(inode)->deferred_lock);
_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
cifs_done_oplock_break(cinode);
}
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 78889024a7ed..92d4ab029c91 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -137,6 +137,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_u32("min_enc_offload", Opt_min_enc_offload),
fsparam_u32("esize", Opt_min_enc_offload),
fsparam_u32("bsize", Opt_blocksize),
+ fsparam_u32("rasize", Opt_rasize),
fsparam_u32("rsize", Opt_rsize),
fsparam_u32("wsize", Opt_wsize),
fsparam_u32("actimeo", Opt_actimeo),
@@ -188,8 +189,8 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
{}
};
-int
-cifs_parse_security_flavors(char *value, struct smb3_fs_context *ctx)
+static int
+cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_context *ctx)
{
substring_t args[MAX_OPT_ARGS];
@@ -203,7 +204,7 @@ cifs_parse_security_flavors(char *value, struct smb3_fs_context *ctx)
switch (match_token(value, cifs_secflavor_tokens, args)) {
case Opt_sec_krb5p:
- cifs_dbg(VFS, "sec=krb5p is not supported!\n");
+ cifs_errorf(fc, "sec=krb5p is not supported!\n");
return 1;
case Opt_sec_krb5i:
ctx->sign = true;
@@ -238,7 +239,7 @@ cifs_parse_security_flavors(char *value, struct smb3_fs_context *ctx)
ctx->nullauth = 1;
break;
default:
- cifs_dbg(VFS, "bad security option: %s\n", value);
+ cifs_errorf(fc, "bad security option: %s\n", value);
return 1;
}
@@ -254,8 +255,8 @@ static const match_table_t cifs_cacheflavor_tokens = {
{ Opt_cache_err, NULL }
};
-int
-cifs_parse_cache_flavor(char *value, struct smb3_fs_context *ctx)
+static int
+cifs_parse_cache_flavor(struct fs_context *fc, char *value, struct smb3_fs_context *ctx)
{
substring_t args[MAX_OPT_ARGS];
@@ -291,7 +292,7 @@ cifs_parse_cache_flavor(char *value, struct smb3_fs_context *ctx)
ctx->cache_rw = true;
break;
default:
- cifs_dbg(VFS, "bad cache= option: %s\n", value);
+ cifs_errorf(fc, "bad cache= option: %s\n", value);
return 1;
}
return 0;
@@ -339,7 +340,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
}
static int
-cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3)
+cifs_parse_smb_version(struct fs_context *fc, char *value, struct smb3_fs_context *ctx, bool is_smb3)
{
substring_t args[MAX_OPT_ARGS];
@@ -347,24 +348,24 @@ cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3)
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
case Smb_1:
if (disable_legacy_dialects) {
- cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+ cifs_errorf(fc, "mount with legacy dialect disabled\n");
return 1;
}
if (is_smb3) {
- cifs_dbg(VFS, "vers=1.0 (cifs) not permitted when mounting with smb3\n");
+ cifs_errorf(fc, "vers=1.0 (cifs) not permitted when mounting with smb3\n");
return 1;
}
- cifs_dbg(VFS, "Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers\n");
+ cifs_errorf(fc, "Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers\n");
ctx->ops = &smb1_operations;
ctx->vals = &smb1_values;
break;
case Smb_20:
if (disable_legacy_dialects) {
- cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+ cifs_errorf(fc, "mount with legacy dialect disabled\n");
return 1;
}
if (is_smb3) {
- cifs_dbg(VFS, "vers=2.0 not permitted when mounting with smb3\n");
+ cifs_errorf(fc, "vers=2.0 not permitted when mounting with smb3\n");
return 1;
}
ctx->ops = &smb20_operations;
@@ -372,10 +373,10 @@ cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3)
break;
#else
case Smb_1:
- cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n");
+ cifs_errorf(fc, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n");
return 1;
case Smb_20:
- cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n");
+ cifs_errorf(fc, "vers=2.0 mount not permitted when legacy dialects disabled\n");
return 1;
#endif /* CIFS_ALLOW_INSECURE_LEGACY */
case Smb_21:
@@ -403,7 +404,7 @@ cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3)
ctx->vals = &smbdefault_values;
break;
default:
- cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
+ cifs_errorf(fc, "Unknown vers= option specified: %s\n", value);
return 1;
}
return 0;
@@ -430,7 +431,7 @@ int smb3_parse_opt(const char *options, const char *key, char **val)
if (nval == p)
continue;
*nval++ = 0;
- *val = kstrndup(nval, strlen(nval), GFP_KERNEL);
+ *val = kstrdup(nval, GFP_KERNEL);
rc = !*val ? -ENOMEM : 0;
goto out;
}
@@ -475,6 +476,7 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx)
/* move "pos" up to delimiter or NULL */
pos += len;
+ kfree(ctx->UNC);
ctx->UNC = kstrndup(devname, pos - devname, GFP_KERNEL);
if (!ctx->UNC)
return -ENOMEM;
@@ -485,6 +487,9 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx)
if (*pos == '/' || *pos == '\\')
pos++;
+ kfree(ctx->prepath);
+ ctx->prepath = NULL;
+
/* If pos is NULL then no prepath */
if (!*pos)
return 0;
@@ -588,14 +593,14 @@ static int smb3_fs_context_validate(struct fs_context *fc)
struct smb3_fs_context *ctx = smb3_fc2context(fc);
if (ctx->rdma && ctx->vals->protocol_id < SMB30_PROT_ID) {
- cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n");
+ cifs_errorf(fc, "SMB Direct requires Version >=3.0\n");
return -EOPNOTSUPP;
}
#ifndef CONFIG_KEYS
/* Muliuser mounts require CONFIG_KEYS support */
if (ctx->multiuser) {
- cifs_dbg(VFS, "Multiuser mounts require kernels with CONFIG_KEYS enabled\n");
+ cifs_errorf(fc, "Multiuser mounts require kernels with CONFIG_KEYS enabled\n");
return -1;
}
#endif
@@ -605,13 +610,13 @@ static int smb3_fs_context_validate(struct fs_context *fc)
if (!ctx->UNC) {
- cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n");
+ cifs_errorf(fc, "CIFS mount error: No usable UNC path provided in device string!\n");
return -1;
}
/* make sure UNC has a share name */
if (strlen(ctx->UNC) < 3 || !strchr(ctx->UNC + 3, '\\')) {
- cifs_dbg(VFS, "Malformed UNC. Unable to find share name.\n");
+ cifs_errorf(fc, "Malformed UNC. Unable to find share name.\n");
return -ENOENT;
}
@@ -684,49 +689,50 @@ static void smb3_fs_context_free(struct fs_context *fc)
* Compare the old and new proposed context during reconfigure
* and check if the changes are compatible.
*/
-static int smb3_verify_reconfigure_ctx(struct smb3_fs_context *new_ctx,
+static int smb3_verify_reconfigure_ctx(struct fs_context *fc,
+ struct smb3_fs_context *new_ctx,
struct smb3_fs_context *old_ctx)
{
if (new_ctx->posix_paths != old_ctx->posix_paths) {
- cifs_dbg(VFS, "can not change posixpaths during remount\n");
+ cifs_errorf(fc, "can not change posixpaths during remount\n");
return -EINVAL;
}
if (new_ctx->sectype != old_ctx->sectype) {
- cifs_dbg(VFS, "can not change sec during remount\n");
+ cifs_errorf(fc, "can not change sec during remount\n");
return -EINVAL;
}
if (new_ctx->multiuser != old_ctx->multiuser) {
- cifs_dbg(VFS, "can not change multiuser during remount\n");
+ cifs_errorf(fc, "can not change multiuser during remount\n");
return -EINVAL;
}
if (new_ctx->UNC &&
(!old_ctx->UNC || strcmp(new_ctx->UNC, old_ctx->UNC))) {
- cifs_dbg(VFS, "can not change UNC during remount\n");
+ cifs_errorf(fc, "can not change UNC during remount\n");
return -EINVAL;
}
if (new_ctx->username &&
(!old_ctx->username || strcmp(new_ctx->username, old_ctx->username))) {
- cifs_dbg(VFS, "can not change username during remount\n");
+ cifs_errorf(fc, "can not change username during remount\n");
return -EINVAL;
}
if (new_ctx->password &&
(!old_ctx->password || strcmp(new_ctx->password, old_ctx->password))) {
- cifs_dbg(VFS, "can not change password during remount\n");
+ cifs_errorf(fc, "can not change password during remount\n");
return -EINVAL;
}
if (new_ctx->domainname &&
(!old_ctx->domainname || strcmp(new_ctx->domainname, old_ctx->domainname))) {
- cifs_dbg(VFS, "can not change domainname during remount\n");
+ cifs_errorf(fc, "can not change domainname during remount\n");
return -EINVAL;
}
if (new_ctx->nodename &&
(!old_ctx->nodename || strcmp(new_ctx->nodename, old_ctx->nodename))) {
- cifs_dbg(VFS, "can not change nodename during remount\n");
+ cifs_errorf(fc, "can not change nodename during remount\n");
return -EINVAL;
}
if (new_ctx->iocharset &&
(!old_ctx->iocharset || strcmp(new_ctx->iocharset, old_ctx->iocharset))) {
- cifs_dbg(VFS, "can not change iocharset during remount\n");
+ cifs_errorf(fc, "can not change iocharset during remount\n");
return -EINVAL;
}
@@ -747,7 +753,7 @@ static int smb3_reconfigure(struct fs_context *fc)
struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
int rc;
- rc = smb3_verify_reconfigure_ctx(ctx, cifs_sb->ctx);
+ rc = smb3_verify_reconfigure_ctx(fc, ctx, cifs_sb->ctx);
if (rc)
return rc;
@@ -933,13 +939,33 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
*/
if ((result.uint_32 < CIFS_MAX_MSGSIZE) ||
(result.uint_32 > (4 * SMB3_DEFAULT_IOSIZE))) {
- cifs_dbg(VFS, "%s: Invalid blocksize\n",
+ cifs_errorf(fc, "%s: Invalid blocksize\n",
__func__);
goto cifs_parse_mount_err;
}
ctx->bsize = result.uint_32;
ctx->got_bsize = true;
break;
+ case Opt_rasize:
+ /*
+ * readahead size realistically should never need to be
+ * less than 1M (CIFS_DEFAULT_IOSIZE) or greater than 32M
+ * (perhaps an exception should be considered in the
+ * for the case of a large number of channels
+ * when multichannel is negotiated) since that would lead
+ * to plenty of parallel I/O in flight to the server.
+ * Note that smaller read ahead sizes would
+ * hurt performance of common tools like cp and scp
+ * which often trigger sequential i/o with read ahead
+ */
+ if ((result.uint_32 > (8 * SMB3_DEFAULT_IOSIZE)) ||
+ (result.uint_32 < CIFS_DEFAULT_IOSIZE)) {
+ cifs_errorf(fc, "%s: Invalid rasize %d vs. %d\n",
+ __func__, result.uint_32, SMB3_DEFAULT_IOSIZE);
+ goto cifs_parse_mount_err;
+ }
+ ctx->rasize = result.uint_32;
+ break;
case Opt_rsize:
ctx->rsize = result.uint_32;
ctx->got_rsize = true;
@@ -951,25 +977,25 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
case Opt_acregmax:
ctx->acregmax = HZ * result.uint_32;
if (ctx->acregmax > CIFS_MAX_ACTIMEO) {
- cifs_dbg(VFS, "acregmax too large\n");
+ cifs_errorf(fc, "acregmax too large\n");
goto cifs_parse_mount_err;
}
break;
case Opt_acdirmax:
ctx->acdirmax = HZ * result.uint_32;
if (ctx->acdirmax > CIFS_MAX_ACTIMEO) {
- cifs_dbg(VFS, "acdirmax too large\n");
+ cifs_errorf(fc, "acdirmax too large\n");
goto cifs_parse_mount_err;
}
break;
case Opt_actimeo:
if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) {
- cifs_dbg(VFS, "timeout too large\n");
+ cifs_errorf(fc, "timeout too large\n");
goto cifs_parse_mount_err;
}
if ((ctx->acdirmax != CIFS_DEF_ACTIMEO) ||
(ctx->acregmax != CIFS_DEF_ACTIMEO)) {
- cifs_dbg(VFS, "actimeo ignored since acregmax or acdirmax specified\n");
+ cifs_errorf(fc, "actimeo ignored since acregmax or acdirmax specified\n");
break;
}
ctx->acdirmax = ctx->acregmax = HZ * result.uint_32;
@@ -982,7 +1008,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_max_credits:
if (result.uint_32 < 20 || result.uint_32 > 60000) {
- cifs_dbg(VFS, "%s: Invalid max_credits value\n",
+ cifs_errorf(fc, "%s: Invalid max_credits value\n",
__func__);
goto cifs_parse_mount_err;
}
@@ -990,16 +1016,19 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_max_channels:
if (result.uint_32 < 1 || result.uint_32 > CIFS_MAX_CHANNELS) {
- cifs_dbg(VFS, "%s: Invalid max_channels value, needs to be 1-%d\n",
+ cifs_errorf(fc, "%s: Invalid max_channels value, needs to be 1-%d\n",
__func__, CIFS_MAX_CHANNELS);
goto cifs_parse_mount_err;
}
ctx->max_channels = result.uint_32;
+ /* If more than one channel requested ... they want multichan */
+ if (result.uint_32 > 1)
+ ctx->multichannel = true;
break;
case Opt_handletimeout:
ctx->handle_timeout = result.uint_32;
if (ctx->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) {
- cifs_dbg(VFS, "Invalid handle cache timeout, longer than 16 minutes\n");
+ cifs_errorf(fc, "Invalid handle cache timeout, longer than 16 minutes\n");
goto cifs_parse_mount_err;
}
break;
@@ -1010,23 +1039,23 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
case 0:
break;
case -ENOMEM:
- cifs_dbg(VFS, "Unable to allocate memory for devname\n");
+ cifs_errorf(fc, "Unable to allocate memory for devname\n");
goto cifs_parse_mount_err;
case -EINVAL:
- cifs_dbg(VFS, "Malformed UNC in devname\n");
+ cifs_errorf(fc, "Malformed UNC in devname\n");
goto cifs_parse_mount_err;
default:
- cifs_dbg(VFS, "Unknown error parsing devname\n");
+ cifs_errorf(fc, "Unknown error parsing devname\n");
goto cifs_parse_mount_err;
}
ctx->source = kstrdup(param->string, GFP_KERNEL);
if (ctx->source == NULL) {
- cifs_dbg(VFS, "OOM when copying UNC string\n");
+ cifs_errorf(fc, "OOM when copying UNC string\n");
goto cifs_parse_mount_err;
}
fc->source = kstrdup(param->string, GFP_KERNEL);
if (fc->source == NULL) {
- cifs_dbg(VFS, "OOM when copying UNC string\n");
+ cifs_errorf(fc, "OOM when copying UNC string\n");
goto cifs_parse_mount_err;
}
break;
@@ -1046,7 +1075,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
}
ctx->username = kstrdup(param->string, GFP_KERNEL);
if (ctx->username == NULL) {
- cifs_dbg(VFS, "OOM when copying username string\n");
+ cifs_errorf(fc, "OOM when copying username string\n");
goto cifs_parse_mount_err;
}
break;
@@ -1058,7 +1087,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->password = kstrdup(param->string, GFP_KERNEL);
if (ctx->password == NULL) {
- cifs_dbg(VFS, "OOM when copying password string\n");
+ cifs_errorf(fc, "OOM when copying password string\n");
goto cifs_parse_mount_err;
}
break;
@@ -1085,7 +1114,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
kfree(ctx->domainname);
ctx->domainname = kstrdup(param->string, GFP_KERNEL);
if (ctx->domainname == NULL) {
- cifs_dbg(VFS, "OOM when copying domainname string\n");
+ cifs_errorf(fc, "OOM when copying domainname string\n");
goto cifs_parse_mount_err;
}
cifs_dbg(FYI, "Domain name set\n");
@@ -1109,14 +1138,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
kfree(ctx->iocharset);
ctx->iocharset = kstrdup(param->string, GFP_KERNEL);
if (ctx->iocharset == NULL) {
- cifs_dbg(VFS, "OOM when copying iocharset string\n");
+ cifs_errorf(fc, "OOM when copying iocharset string\n");
goto cifs_parse_mount_err;
}
}
/* if iocharset not set then load_nls_default
* is used by caller
*/
- cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset);
+ cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset);
break;
case Opt_netbiosname:
memset(ctx->source_rfc1001_name, 0x20,
@@ -1175,21 +1204,21 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
goto cifs_parse_mount_err;
case Opt_vers:
/* protocol version (dialect) */
- if (cifs_parse_smb_version(param->string, ctx, is_smb3) != 0)
+ if (cifs_parse_smb_version(fc, param->string, ctx, is_smb3) != 0)
goto cifs_parse_mount_err;
ctx->got_version = true;
break;
case Opt_sec:
- if (cifs_parse_security_flavors(param->string, ctx) != 0)
+ if (cifs_parse_security_flavors(fc, param->string, ctx) != 0)
goto cifs_parse_mount_err;
break;
case Opt_cache:
- if (cifs_parse_cache_flavor(param->string, ctx) != 0)
+ if (cifs_parse_cache_flavor(fc, param->string, ctx) != 0)
goto cifs_parse_mount_err;
break;
case Opt_witness:
#ifndef CONFIG_CIFS_SWN_UPCALL
- cifs_dbg(VFS, "Witness support needs CONFIG_CIFS_SWN_UPCALL config option\n");
+ cifs_errorf(fc, "Witness support needs CONFIG_CIFS_SWN_UPCALL config option\n");
goto cifs_parse_mount_err;
#endif
ctx->witness = true;
@@ -1290,7 +1319,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_fsc:
#ifndef CONFIG_CIFS_FSCACHE
- cifs_dbg(VFS, "FS-Cache support needs CONFIG_CIFS_FSCACHE kernel config option set\n");
+ cifs_errorf(fc, "FS-Cache support needs CONFIG_CIFS_FSCACHE kernel config option set\n");
goto cifs_parse_mount_err;
#endif
ctx->fsc = true;
@@ -1311,15 +1340,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
if (result.negated) {
ctx->nopersistent = true;
if (ctx->persistent) {
- cifs_dbg(VFS,
- "persistenthandles mount options conflict\n");
+ cifs_errorf(fc, "persistenthandles mount options conflict\n");
goto cifs_parse_mount_err;
}
} else {
ctx->persistent = true;
if ((ctx->nopersistent) || (ctx->resilient)) {
- cifs_dbg(VFS,
- "persistenthandles mount options conflict\n");
+ cifs_errorf(fc, "persistenthandles mount options conflict\n");
goto cifs_parse_mount_err;
}
}
@@ -1330,8 +1357,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
} else {
ctx->resilient = true;
if (ctx->persistent) {
- cifs_dbg(VFS,
- "persistenthandles mount options conflict\n");
+ cifs_errorf(fc, "persistenthandles mount options conflict\n");
goto cifs_parse_mount_err;
}
}
@@ -1379,7 +1405,9 @@ int smb3_init_fs_context(struct fs_context *fc)
ctx->cred_uid = current_uid();
ctx->linux_uid = current_uid();
ctx->linux_gid = current_gid();
- ctx->bsize = 1024 * 1024; /* can improve cp performance significantly */
+ /* By default 4MB read ahead size, 1MB block size */
+ ctx->bsize = CIFS_DEFAULT_IOSIZE; /* can improve cp performance significantly */
+ ctx->rasize = 0; /* 0 = use default (ie negotiated rsize) for read ahead pages */
/*
* default to SFM style remapping of seven reserved characters
@@ -1621,6 +1649,7 @@ void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb)
cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n");
}
}
+ cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SHUTDOWN;
return;
}
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index 87dd1f7168f2..2a71c8e411ac 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -13,7 +13,12 @@
#include <linux/parser.h>
#include <linux/fs_parser.h>
-#define cifs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__)
+/* Log errors in fs_context (new mount api) but also in dmesg (old style) */
+#define cifs_errorf(fc, fmt, ...) \
+ do { \
+ errorf(fc, fmt, ## __VA_ARGS__); \
+ cifs_dbg(VFS, fmt, ## __VA_ARGS__); \
+ } while (0)
enum smb_version {
Smb_1 = 1,
@@ -115,6 +120,7 @@ enum cifs_param {
Opt_dirmode,
Opt_min_enc_offload,
Opt_blocksize,
+ Opt_rasize,
Opt_rsize,
Opt_wsize,
Opt_actimeo,
@@ -230,6 +236,7 @@ struct smb3_fs_context {
/* reuse existing guid for multichannel */
u8 client_guid[SMB2_CLIENT_GUID_SIZE];
unsigned int bsize;
+ unsigned int rasize;
unsigned int rsize;
unsigned int wsize;
unsigned int min_offload;
@@ -257,10 +264,6 @@ struct smb3_fs_context {
extern const struct fs_parameter_spec smb3_fs_parameters[];
-extern int cifs_parse_cache_flavor(char *value,
- struct smb3_fs_context *ctx);
-extern int cifs_parse_security_flavors(char *value,
- struct smb3_fs_context *ctx);
extern int smb3_init_fs_context(struct fs_context *fc);
extern void smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx);
extern void smb3_cleanup_fs_context(struct smb3_fs_context *ctx);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f2df4422e54a..1dfa57982522 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -26,7 +26,6 @@
#include <linux/sched/signal.h>
#include <linux/wait_bit.h>
#include <linux/fiemap.h>
-
#include <asm/div64.h>
#include "cifsfs.h"
#include "cifspdu.h"
@@ -38,7 +37,7 @@
#include "cifs_unicode.h"
#include "fscache.h"
#include "fs_context.h"
-
+#include "cifs_ioctl.h"
static void cifs_set_ops(struct inode *inode)
{
@@ -157,12 +156,18 @@ cifs_nlink_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
}
/* populate an inode with info from a cifs_fattr struct */
-void
+int
cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
{
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ if (!(inode->i_state & I_NEW) &&
+ unlikely(inode_wrong_type(inode, fattr->cf_mode))) {
+ CIFS_I(inode)->time = 0; /* force reval */
+ return -ESTALE;
+ }
+
cifs_revalidate_cache(inode, fattr);
spin_lock(&inode->i_lock);
@@ -219,6 +224,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
inode->i_flags |= S_AUTOMOUNT;
if (inode->i_state & I_NEW)
cifs_set_ops(inode);
+ return 0;
}
void
@@ -363,7 +369,7 @@ cifs_get_file_info_unix(struct file *filp)
rc = 0;
}
- cifs_fattr_to_inode(inode, &fattr);
+ rc = cifs_fattr_to_inode(inode, &fattr);
free_xid(xid);
return rc;
}
@@ -426,14 +432,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
}
/* if filetype is different, return error */
- if (unlikely(((*pinode)->i_mode & S_IFMT) !=
- (fattr.cf_mode & S_IFMT))) {
- CIFS_I(*pinode)->time = 0; /* force reval */
- rc = -ESTALE;
- goto cgiiu_exit;
- }
-
- cifs_fattr_to_inode(*pinode, &fattr);
+ rc = cifs_fattr_to_inode(*pinode, &fattr);
}
cgiiu_exit:
@@ -783,7 +782,8 @@ cifs_get_file_info(struct file *filp)
*/
fattr.cf_uniqueid = CIFS_I(inode)->uniqueid;
fattr.cf_flags |= CIFS_FATTR_NEED_REVAL;
- cifs_fattr_to_inode(inode, &fattr);
+ /* if filetype is different, return error */
+ rc = cifs_fattr_to_inode(inode, &fattr);
cgfi_exit:
free_xid(xid);
return rc;
@@ -1100,16 +1100,8 @@ handle_mnt_opt:
rc = -ESTALE;
goto out;
}
-
/* if filetype is different, return error */
- if (unlikely(((*inode)->i_mode & S_IFMT) !=
- (fattr.cf_mode & S_IFMT))) {
- CIFS_I(*inode)->time = 0; /* force reval */
- rc = -ESTALE;
- goto out;
- }
-
- cifs_fattr_to_inode(*inode, &fattr);
+ rc = cifs_fattr_to_inode(*inode, &fattr);
}
out:
cifs_buf_release(smb1_backup_rsp_buf);
@@ -1215,14 +1207,7 @@ smb311_posix_get_inode_info(struct inode **inode,
}
/* if filetype is different, return error */
- if (unlikely(((*inode)->i_mode & S_IFMT) !=
- (fattr.cf_mode & S_IFMT))) {
- CIFS_I(*inode)->time = 0; /* force reval */
- rc = -ESTALE;
- goto out;
- }
-
- cifs_fattr_to_inode(*inode, &fattr);
+ rc = cifs_fattr_to_inode(*inode, &fattr);
}
out:
cifs_put_tlink(tlink);
@@ -1249,7 +1234,7 @@ cifs_find_inode(struct inode *inode, void *opaque)
return 0;
/* don't match inode of different type */
- if ((inode->i_mode & S_IFMT) != (fattr->cf_mode & S_IFMT))
+ if (inode_wrong_type(inode, fattr->cf_mode))
return 0;
/* if it's not a directory or has no dentries, then flag it */
@@ -1317,6 +1302,7 @@ retry_iget5_locked:
}
}
+ /* can't fail - see cifs_find_inode() */
cifs_fattr_to_inode(inode, fattr);
if (sb->s_flags & SB_NOATIME)
inode->i_flags |= S_NOATIME | S_NOCMTIME;
@@ -1408,7 +1394,7 @@ out:
int
cifs_set_file_info(struct inode *inode, struct iattr *attrs, unsigned int xid,
- char *full_path, __u32 dosattr)
+ const char *full_path, __u32 dosattr)
{
bool set_time = false;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -1609,7 +1595,8 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
{
int rc = 0;
unsigned int xid;
- char *full_path = NULL;
+ const char *full_path;
+ void *page;
struct inode *inode = d_inode(dentry);
struct cifsInodeInfo *cifs_inode;
struct super_block *sb = dir->i_sb;
@@ -1622,6 +1609,9 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
cifs_dbg(FYI, "cifs_unlink, dir=0x%p, dentry=0x%p\n", dir, dentry);
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
@@ -1629,6 +1619,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
server = tcon->ses->server;
xid = get_xid();
+ page = alloc_dentry_path();
if (tcon->nodelete) {
rc = -EACCES;
@@ -1637,12 +1628,13 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
/* Unlink can be called from rename so we can not take the
* sb->s_vfs_rename_mutex here */
- full_path = build_path_from_dentry(dentry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ full_path = build_path_from_dentry(dentry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto unlink_out;
}
+ cifs_close_all_deferred_files(tcon);
if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = CIFSPOSIXDelFile(xid, tcon, full_path,
@@ -1713,7 +1705,7 @@ out_reval:
cifs_inode = CIFS_I(dir);
CIFS_I(dir)->time = 0; /* force revalidate of dir as well */
unlink_out:
- kfree(full_path);
+ free_dentry_path(page);
kfree(attrs);
free_xid(xid);
cifs_put_tlink(tlink);
@@ -1740,6 +1732,16 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
if (rc)
return rc;
+ if (!S_ISDIR(inode->i_mode)) {
+ /*
+ * mkdir succeeded, but another client has managed to remove the
+ * sucker and replace it with non-directory. Return success,
+ * but don't leave the child in dcache.
+ */
+ iput(inode);
+ d_drop(dentry);
+ return 0;
+ }
/*
* setting nlink not necessary except in cases where we failed to get it
* from the server or was set bogus. Also, since this is a brand new
@@ -1791,7 +1793,7 @@ cifs_mkdir_qinfo(struct inode *parent, struct dentry *dentry, umode_t mode,
}
}
d_instantiate(dentry, inode);
- return rc;
+ return 0;
}
static int
@@ -1866,12 +1868,15 @@ int cifs_mkdir(struct user_namespace *mnt_userns, struct inode *inode,
struct tcon_link *tlink;
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
- char *full_path;
+ const char *full_path;
+ void *page;
cifs_dbg(FYI, "In cifs_mkdir, mode = %04ho inode = 0x%p\n",
mode, inode);
cifs_sb = CIFS_SB(inode->i_sb);
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
@@ -1879,9 +1884,10 @@ int cifs_mkdir(struct user_namespace *mnt_userns, struct inode *inode,
xid = get_xid();
- full_path = build_path_from_dentry(direntry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ page = alloc_dentry_path();
+ full_path = build_path_from_dentry(direntry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto mkdir_out;
}
@@ -1924,7 +1930,7 @@ mkdir_out:
* attributes are invalid now.
*/
CIFS_I(inode)->time = 0;
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
return rc;
@@ -1938,20 +1944,26 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
struct tcon_link *tlink;
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
- char *full_path = NULL;
+ const char *full_path;
+ void *page = alloc_dentry_path();
struct cifsInodeInfo *cifsInode;
cifs_dbg(FYI, "cifs_rmdir, inode = 0x%p\n", inode);
xid = get_xid();
- full_path = build_path_from_dentry(direntry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ full_path = build_path_from_dentry(direntry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto rmdir_exit;
}
cifs_sb = CIFS_SB(inode->i_sb);
+ if (unlikely(cifs_forced_shutdown(cifs_sb))) {
+ rc = -EIO;
+ goto rmdir_exit;
+ }
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
rc = PTR_ERR(tlink);
@@ -1997,7 +2009,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
current_time(inode);
rmdir_exit:
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
return rc;
}
@@ -2072,8 +2084,8 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
struct dentry *source_dentry, struct inode *target_dir,
struct dentry *target_dentry, unsigned int flags)
{
- char *from_name = NULL;
- char *to_name = NULL;
+ const char *from_name, *to_name;
+ void *page1, *page2;
struct cifs_sb_info *cifs_sb;
struct tcon_link *tlink;
struct cifs_tcon *tcon;
@@ -2086,29 +2098,31 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
return -EINVAL;
cifs_sb = CIFS_SB(source_dir->i_sb);
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
tcon = tlink_tcon(tlink);
+ page1 = alloc_dentry_path();
+ page2 = alloc_dentry_path();
xid = get_xid();
- /*
- * we already have the rename sem so we do not need to
- * grab it again here to protect the path integrity
- */
- from_name = build_path_from_dentry(source_dentry);
- if (from_name == NULL) {
- rc = -ENOMEM;
+ from_name = build_path_from_dentry(source_dentry, page1);
+ if (IS_ERR(from_name)) {
+ rc = PTR_ERR(from_name);
goto cifs_rename_exit;
}
- to_name = build_path_from_dentry(target_dentry);
- if (to_name == NULL) {
- rc = -ENOMEM;
+ to_name = build_path_from_dentry(target_dentry, page2);
+ if (IS_ERR(to_name)) {
+ rc = PTR_ERR(to_name);
goto cifs_rename_exit;
}
+ cifs_close_all_deferred_files(tcon);
rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
to_name);
@@ -2177,18 +2191,21 @@ unlink_target:
cifs_rename_exit:
kfree(info_buf_source);
- kfree(from_name);
- kfree(to_name);
+ free_dentry_path(page2);
+ free_dentry_path(page1);
free_xid(xid);
cifs_put_tlink(tlink);
return rc;
}
static bool
-cifs_inode_needs_reval(struct inode *inode)
+cifs_dentry_needs_reval(struct dentry *dentry)
{
+ struct inode *inode = d_inode(dentry);
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ struct cached_fid *cfid = NULL;
if (cifs_i->time == 0)
return true;
@@ -2199,6 +2216,16 @@ cifs_inode_needs_reval(struct inode *inode)
if (!lookupCacheEnabled)
return true;
+ if (!open_cached_dir_by_dentry(tcon, dentry->d_parent, &cfid)) {
+ mutex_lock(&cfid->fid_mutex);
+ if (cfid->time && cifs_i->time > cfid->time) {
+ mutex_unlock(&cfid->fid_mutex);
+ close_cached_dir(cfid);
+ return false;
+ }
+ mutex_unlock(&cfid->fid_mutex);
+ close_cached_dir(cfid);
+ }
/*
* depending on inode type, check if attribute caching disabled for
* files or directories
@@ -2297,10 +2324,10 @@ cifs_zap_mapping(struct inode *inode)
int cifs_revalidate_file_attr(struct file *filp)
{
int rc = 0;
- struct inode *inode = file_inode(filp);
+ struct dentry *dentry = file_dentry(filp);
struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
- if (!cifs_inode_needs_reval(inode))
+ if (!cifs_dentry_needs_reval(dentry))
return rc;
if (tlink_tcon(cfile->tlink)->unix_ext)
@@ -2317,22 +2344,22 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)
int rc = 0;
struct inode *inode = d_inode(dentry);
struct super_block *sb = dentry->d_sb;
- char *full_path = NULL;
+ const char *full_path;
+ void *page;
int count = 0;
if (inode == NULL)
return -ENOENT;
- if (!cifs_inode_needs_reval(inode))
+ if (!cifs_dentry_needs_reval(dentry))
return rc;
xid = get_xid();
- /* can not safely grab the rename sem here if rename calls revalidate
- since that would deadlock */
- full_path = build_path_from_dentry(dentry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ page = alloc_dentry_path();
+ full_path = build_path_from_dentry(dentry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto out;
}
@@ -2351,7 +2378,7 @@ again:
if (rc == -EAGAIN && count++ < 10)
goto again;
out:
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
return rc;
@@ -2391,6 +2418,9 @@ int cifs_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct inode *inode = d_inode(dentry);
int rc;
+ if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
+ return -EIO;
+
/*
* We need to be sure that all dirty pages are written and the server
* has actual ctime, mtime and file length.
@@ -2463,6 +2493,9 @@ int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start,
struct cifsFileInfo *cfile;
int rc;
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
/*
* We need to be sure that all dirty pages are written as they
* might fill holes on the server.
@@ -2522,7 +2555,7 @@ void cifs_setsize(struct inode *inode, loff_t offset)
static int
cifs_set_file_size(struct inode *inode, struct iattr *attrs,
- unsigned int xid, char *full_path)
+ unsigned int xid, const char *full_path)
{
int rc;
struct cifsFileInfo *open_file;
@@ -2613,7 +2646,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
{
int rc;
unsigned int xid;
- char *full_path = NULL;
+ const char *full_path;
+ void *page = alloc_dentry_path();
struct inode *inode = d_inode(direntry);
struct cifsInodeInfo *cifsInode = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -2634,9 +2668,9 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
if (rc < 0)
goto out;
- full_path = build_path_from_dentry(direntry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ full_path = build_path_from_dentry(direntry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto out;
}
@@ -2748,7 +2782,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
cifsInode->time = 0;
out:
kfree(args);
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
return rc;
}
@@ -2764,7 +2798,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
struct cifsInodeInfo *cifsInode = CIFS_I(inode);
struct cifsFileInfo *wfile;
struct cifs_tcon *tcon;
- char *full_path = NULL;
+ const char *full_path;
+ void *page = alloc_dentry_path();
int rc = -EACCES;
__u32 dosattr = 0;
__u64 mode = NO_CHANGE_64;
@@ -2778,16 +2813,13 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
attrs->ia_valid |= ATTR_FORCE;
rc = setattr_prepare(&init_user_ns, direntry, attrs);
- if (rc < 0) {
- free_xid(xid);
- return rc;
- }
+ if (rc < 0)
+ goto cifs_setattr_exit;
- full_path = build_path_from_dentry(direntry);
- if (full_path == NULL) {
- rc = -ENOMEM;
- free_xid(xid);
- return rc;
+ full_path = build_path_from_dentry(direntry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
+ goto cifs_setattr_exit;
}
/*
@@ -2937,8 +2969,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
mark_inode_dirty(inode);
cifs_setattr_exit:
- kfree(full_path);
free_xid(xid);
+ free_dentry_path(page);
return rc;
}
@@ -2950,6 +2982,9 @@ cifs_setattr(struct user_namespace *mnt_userns, struct dentry *direntry,
struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
int rc, retries = 0;
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
do {
if (pTcon->unix_ext)
rc = cifs_setattr_unix(direntry, attrs);
@@ -2961,12 +2996,3 @@ cifs_setattr(struct user_namespace *mnt_userns, struct dentry *direntry,
/* BB: add cifs_setattr_legacy for really old servers */
return rc;
}
-
-#if 0
-void cifs_delete_inode(struct inode *inode)
-{
- cifs_dbg(FYI, "In cifs_delete_inode, inode = 0x%p\n", inode);
- /* may have to add back in if and when safe distributed caching of
- directories added e.g. via FindNotify */
-}
-#endif
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index dcde44ff6cf9..d67d281ab863 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -33,6 +33,7 @@
#include "cifsfs.h"
#include "cifs_ioctl.h"
#include "smb2proto.h"
+#include "smb2glob.h"
#include <linux/btrfs.h>
static long cifs_ioctl_query_info(unsigned int xid, struct file *filep,
@@ -42,13 +43,16 @@ static long cifs_ioctl_query_info(unsigned int xid, struct file *filep,
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
struct dentry *dentry = filep->f_path.dentry;
- unsigned char *path;
+ const unsigned char *path;
+ void *page = alloc_dentry_path();
__le16 *utf16_path = NULL, root_path;
int rc = 0;
- path = build_path_from_dentry(dentry);
- if (path == NULL)
- return -ENOMEM;
+ path = build_path_from_dentry(dentry, page);
+ if (IS_ERR(path)) {
+ free_dentry_path(page);
+ return PTR_ERR(path);
+ }
cifs_dbg(FYI, "%s %s\n", __func__, path);
@@ -73,7 +77,7 @@ static long cifs_ioctl_query_info(unsigned int xid, struct file *filep,
ici_exit:
if (utf16_path != &root_path)
kfree(utf16_path);
- kfree(path);
+ free_dentry_path(page);
return rc;
}
@@ -161,6 +165,164 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
+static int cifs_shutdown(struct super_block *sb, unsigned long arg)
+{
+ struct cifs_sb_info *sbi = CIFS_SB(sb);
+ __u32 flags;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(flags, (__u32 __user *)arg))
+ return -EFAULT;
+
+ if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH)
+ return -EINVAL;
+
+ if (cifs_forced_shutdown(sbi))
+ return 0;
+
+ cifs_dbg(VFS, "shut down requested (%d)", flags);
+/* trace_cifs_shutdown(sb, flags);*/
+
+ /*
+ * see:
+ * https://man7.org/linux/man-pages/man2/ioctl_xfs_goingdown.2.html
+ * for more information and description of original intent of the flags
+ */
+ switch (flags) {
+ /*
+ * We could add support later for default flag which requires:
+ * "Flush all dirty data and metadata to disk"
+ * would need to call syncfs or equivalent to flush page cache for
+ * the mount and then issue fsync to server (if nostrictsync not set)
+ */
+ case CIFS_GOING_FLAGS_DEFAULT:
+ cifs_dbg(FYI, "shutdown with default flag not supported\n");
+ return -EINVAL;
+ /*
+ * FLAGS_LOGFLUSH is easy since it asks to write out metadata (not
+ * data) but metadata writes are not cached on the client, so can treat
+ * it similarly to NOLOGFLUSH
+ */
+ case CIFS_GOING_FLAGS_LOGFLUSH:
+ case CIFS_GOING_FLAGS_NOLOGFLUSH:
+ sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug_info __user *in)
+{
+ struct smb3_full_key_debug_info out;
+ struct cifs_ses *ses;
+ int rc = 0;
+ bool found = false;
+ u8 __user *end;
+
+ if (!smb3_encryption_required(tcon)) {
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /* copy user input into our output buffer */
+ if (copy_from_user(&out, in, sizeof(out))) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (!out.session_id) {
+ /* if ses id is 0, use current user session */
+ ses = tcon->ses;
+ } else {
+ /* otherwise if a session id is given, look for it in all our sessions */
+ struct cifs_ses *ses_it = NULL;
+ struct TCP_Server_Info *server_it = NULL;
+
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
+ list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
+ if (ses_it->Suid == out.session_id) {
+ ses = ses_it;
+ /*
+ * since we are using the session outside the crit
+ * section, we need to make sure it won't be released
+ * so increment its refcount
+ */
+ ses->ses_count++;
+ found = true;
+ goto search_end;
+ }
+ }
+ }
+search_end:
+ spin_unlock(&cifs_tcp_ses_lock);
+ if (!found) {
+ rc = -ENOENT;
+ goto out;
+ }
+ }
+
+ switch (ses->server->cipher_type) {
+ case SMB2_ENCRYPTION_AES128_CCM:
+ case SMB2_ENCRYPTION_AES128_GCM:
+ out.session_key_length = CIFS_SESS_KEY_SIZE;
+ out.server_in_key_length = out.server_out_key_length = SMB3_GCM128_CRYPTKEY_SIZE;
+ break;
+ case SMB2_ENCRYPTION_AES256_CCM:
+ case SMB2_ENCRYPTION_AES256_GCM:
+ out.session_key_length = CIFS_SESS_KEY_SIZE;
+ out.server_in_key_length = out.server_out_key_length = SMB3_GCM256_CRYPTKEY_SIZE;
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /* check if user buffer is big enough to store all the keys */
+ if (out.in_size < sizeof(out) + out.session_key_length + out.server_in_key_length
+ + out.server_out_key_length) {
+ rc = -ENOBUFS;
+ goto out;
+ }
+
+ out.session_id = ses->Suid;
+ out.cipher_type = le16_to_cpu(ses->server->cipher_type);
+
+ /* overwrite user input with our output */
+ if (copy_to_user(in, &out, sizeof(out))) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* append all the keys at the end of the user buffer */
+ end = in->data;
+ if (copy_to_user(end, ses->auth_key.response, out.session_key_length)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ end += out.session_key_length;
+
+ if (copy_to_user(end, ses->smb3encryptionkey, out.server_in_key_length)) {
+ rc = -EINVAL;
+ goto out;
+ }
+ end += out.server_in_key_length;
+
+ if (copy_to_user(end, ses->smb3decryptionkey, out.server_out_key_length)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+out:
+ if (found)
+ cifs_put_smb_ses(ses);
+ return rc;
+}
+
long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
{
struct inode *inode = file_inode(filep);
@@ -274,6 +436,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
rc = -EOPNOTSUPP;
break;
case CIFS_DUMP_KEY:
+ /*
+ * Dump encryption keys. This is an old ioctl that only
+ * handles AES-128-{CCM,GCM}.
+ */
if (pSMBFile == NULL)
break;
if (!capable(CAP_SYS_ADMIN)) {
@@ -301,6 +467,19 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
else
rc = 0;
break;
+ case CIFS_DUMP_FULL_KEY:
+ /*
+ * Dump encryption keys (handles any key sizes)
+ */
+ if (pSMBFile == NULL)
+ break;
+ if (!capable(CAP_SYS_ADMIN)) {
+ rc = -EACCES;
+ break;
+ }
+ tcon = tlink_tcon(pSMBFile->tlink);
+ rc = cifs_dump_full_key(tcon, (void __user *)arg);
+ break;
case CIFS_IOC_NOTIFY:
if (!S_ISDIR(inode->i_mode)) {
/* Notify can only be done on directories */
@@ -322,6 +501,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
rc = -EOPNOTSUPP;
cifs_put_tlink(tlink);
break;
+ case CIFS_IOC_SHUTDOWN:
+ rc = cifs_shutdown(inode->i_sb, arg);
+ break;
default:
cifs_dbg(FYI, "unsupported ioctl\n");
break;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 7c5878a645d9..970fcf2adb08 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -30,6 +30,7 @@
#include "cifs_fs_sb.h"
#include "cifs_unicode.h"
#include "smb2proto.h"
+#include "cifs_ioctl.h"
/*
* M-F Symlink Functions - Begin
@@ -510,25 +511,34 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
{
int rc = -EACCES;
unsigned int xid;
- char *from_name = NULL;
- char *to_name = NULL;
+ const char *from_name, *to_name;
+ void *page1, *page2;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct tcon_link *tlink;
struct cifs_tcon *tcon;
struct TCP_Server_Info *server;
struct cifsInodeInfo *cifsInode;
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
tcon = tlink_tcon(tlink);
xid = get_xid();
+ page1 = alloc_dentry_path();
+ page2 = alloc_dentry_path();
- from_name = build_path_from_dentry(old_file);
- to_name = build_path_from_dentry(direntry);
- if ((from_name == NULL) || (to_name == NULL)) {
- rc = -ENOMEM;
+ from_name = build_path_from_dentry(old_file, page1);
+ if (IS_ERR(from_name)) {
+ rc = PTR_ERR(from_name);
+ goto cifs_hl_exit;
+ }
+ to_name = build_path_from_dentry(direntry, page2);
+ if (IS_ERR(to_name)) {
+ rc = PTR_ERR(to_name);
goto cifs_hl_exit;
}
@@ -587,8 +597,8 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
}
cifs_hl_exit:
- kfree(from_name);
- kfree(to_name);
+ free_dentry_path(page1);
+ free_dentry_path(page2);
free_xid(xid);
cifs_put_tlink(tlink);
return rc;
@@ -600,7 +610,8 @@ cifs_get_link(struct dentry *direntry, struct inode *inode,
{
int rc = -ENOMEM;
unsigned int xid;
- char *full_path = NULL;
+ const char *full_path;
+ void *page;
char *target_path = NULL;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct tcon_link *tlink = NULL;
@@ -620,11 +631,13 @@ cifs_get_link(struct dentry *direntry, struct inode *inode,
tcon = tlink_tcon(tlink);
server = tcon->ses->server;
- full_path = build_path_from_dentry(direntry);
- if (!full_path) {
+ page = alloc_dentry_path();
+ full_path = build_path_from_dentry(direntry, page);
+ if (IS_ERR(full_path)) {
free_xid(xid);
cifs_put_tlink(tlink);
- return ERR_PTR(-ENOMEM);
+ free_dentry_path(page);
+ return ERR_CAST(full_path);
}
cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, inode);
@@ -649,7 +662,7 @@ cifs_get_link(struct dentry *direntry, struct inode *inode,
&target_path, reparse_point);
}
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
if (rc != 0) {
@@ -669,9 +682,17 @@ cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode,
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct tcon_link *tlink;
struct cifs_tcon *pTcon;
- char *full_path = NULL;
+ const char *full_path;
+ void *page;
struct inode *newinode = NULL;
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
+ page = alloc_dentry_path();
+ if (!page)
+ return -ENOMEM;
+
xid = get_xid();
tlink = cifs_sb_tlink(cifs_sb);
@@ -681,9 +702,9 @@ cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode,
}
pTcon = tlink_tcon(tlink);
- full_path = build_path_from_dentry(direntry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ full_path = build_path_from_dentry(direntry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto symlink_exit;
}
@@ -719,7 +740,7 @@ cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode,
}
}
symlink_exit:
- kfree(full_path);
+ free_dentry_path(page);
cifs_put_tlink(tlink);
free_xid(xid);
return rc;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 82e176720ca6..7207a63819cb 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -672,6 +672,100 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
}
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ * As there is no reference count on cifs_deferred_close, pdclose
+ * should not be used outside deferred_lock.
+ */
+bool
+cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
+{
+ struct cifs_deferred_close *dclose;
+
+ list_for_each_entry(dclose, &CIFS_I(d_inode(cfile->dentry))->deferred_closes, dlist) {
+ if ((dclose->netfid == cfile->fid.netfid) &&
+ (dclose->persistent_fid == cfile->fid.persistent_fid) &&
+ (dclose->volatile_fid == cfile->fid.volatile_fid)) {
+ *pdclose = dclose;
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ */
+void
+cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close *dclose)
+{
+ bool is_deferred = false;
+ struct cifs_deferred_close *pdclose;
+
+ is_deferred = cifs_is_deferred_close(cfile, &pdclose);
+ if (is_deferred) {
+ kfree(dclose);
+ return;
+ }
+
+ dclose->tlink = cfile->tlink;
+ dclose->netfid = cfile->fid.netfid;
+ dclose->persistent_fid = cfile->fid.persistent_fid;
+ dclose->volatile_fid = cfile->fid.volatile_fid;
+ list_add_tail(&dclose->dlist, &CIFS_I(d_inode(cfile->dentry))->deferred_closes);
+}
+
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ */
+void
+cifs_del_deferred_close(struct cifsFileInfo *cfile)
+{
+ bool is_deferred = false;
+ struct cifs_deferred_close *dclose;
+
+ is_deferred = cifs_is_deferred_close(cfile, &dclose);
+ if (!is_deferred)
+ return;
+ list_del(&dclose->dlist);
+ kfree(dclose);
+}
+
+void
+cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
+{
+ struct cifsFileInfo *cfile = NULL;
+ struct cifs_deferred_close *dclose;
+
+ list_for_each_entry(cfile, &cifs_inode->openFileList, flist) {
+ spin_lock(&cifs_inode->deferred_lock);
+ if (cifs_is_deferred_close(cfile, &dclose))
+ mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+ spin_unlock(&cifs_inode->deferred_lock);
+ }
+}
+
+void
+cifs_close_all_deferred_files(struct cifs_tcon *tcon)
+{
+ struct cifsFileInfo *cfile;
+ struct list_head *tmp;
+
+ spin_lock(&tcon->open_file_lock);
+ list_for_each(tmp, &tcon->openFileList) {
+ cfile = list_entry(tmp, struct cifsFileInfo, tlist);
+ if (delayed_work_pending(&cfile->deferred)) {
+ /*
+ * If there is no pending work, mod_delayed_work queues new work.
+ * So, Increase the ref count to avoid use-after-free.
+ */
+ if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
+ cifsFileInfo_get(cfile);
+ }
+ }
+ spin_unlock(&tcon->open_file_lock);
+}
+
/* parses DFS refferal V3 structure
* caller is responsible for freeing target_nodes
* returns:
@@ -1180,7 +1274,7 @@ int update_super_prepath(struct cifs_tcon *tcon, char *prefix)
kfree(cifs_sb->prepath);
if (prefix && *prefix) {
- cifs_sb->prepath = kstrndup(prefix, strlen(prefix), GFP_ATOMIC);
+ cifs_sb->prepath = kstrdup(prefix, GFP_ATOMIC);
if (!cifs_sb->prepath) {
rc = -ENOMEM;
goto out;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 80bf4c6f4c7b..63bfc533c9fb 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -119,9 +119,7 @@ retry:
/* update inode in place
* if both i_ino and i_mode didn't change */
if (CIFS_I(inode)->uniqueid == fattr->cf_uniqueid &&
- (inode->i_mode & S_IFMT) ==
- (fattr->cf_mode & S_IFMT)) {
- cifs_fattr_to_inode(inode, fattr);
+ cifs_fattr_to_inode(inode, fattr) == 0) {
dput(dentry);
return;
}
@@ -384,7 +382,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
static int
initiate_cifs_search(const unsigned int xid, struct file *file,
- char *full_path)
+ const char *full_path)
{
__u16 search_flags;
int rc = 0;
@@ -704,7 +702,7 @@ static int cifs_save_resume_key(const char *current_entry,
*/
static int
find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
- struct file *file, char *full_path,
+ struct file *file, const char *full_path,
char **current_entry, int *num_to_ret)
{
__u16 search_flags;
@@ -942,13 +940,14 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
char *tmp_buf = NULL;
char *end_of_smb;
unsigned int max_len;
- char *full_path = NULL;
+ const char *full_path;
+ void *page = alloc_dentry_path();
xid = get_xid();
- full_path = build_path_from_dentry(file_dentry(file));
- if (full_path == NULL) {
- rc = -ENOMEM;
+ full_path = build_path_from_dentry(file_dentry(file), page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto rddir2_exit;
}
@@ -1043,7 +1042,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
kfree(tmp_buf);
rddir2_exit:
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
return rc;
}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 63d517b9f2ff..a92a1fb7cb52 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -97,6 +97,12 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
return 0;
}
+ if (!(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
+ cifs_dbg(VFS, "server %s does not support multichannel\n", ses->server->hostname);
+ ses->chan_max = 1;
+ return 0;
+ }
+
/*
* Make a copy of the iface list at the time and use that
* instead so as to not hold the iface spinlock for opening
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index e31b939e628c..3b83839fc2c2 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -926,9 +926,7 @@ cifs_unix_dfs_readlink(const unsigned int xid, struct cifs_tcon *tcon,
0);
if (!rc) {
- *symlinkinfo = kstrndup(referral.node_name,
- strlen(referral.node_name),
- GFP_KERNEL);
+ *symlinkinfo = kstrdup(referral.node_name, GFP_KERNEL);
free_dfs_info_param(&referral);
if (!*symlinkinfo)
rc = -ENOMEM;
@@ -1027,7 +1025,7 @@ cifs_can_echo(struct TCP_Server_Info *server)
static int
cifs_make_node(unsigned int xid, struct inode *inode,
struct dentry *dentry, struct cifs_tcon *tcon,
- char *full_path, umode_t mode, dev_t dev)
+ const char *full_path, umode_t mode, dev_t dev)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct inode *newinode = NULL;
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index a718dc77e604..9a61209a283e 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -512,7 +512,6 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc;
struct smb2_file_all_info *smb2_data;
__u32 create_options = 0;
- bool no_cached_open = tcon->nohandlecache;
struct cifsFileInfo *cfile;
struct cached_fid *cfid = NULL;
@@ -525,11 +524,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
return -ENOMEM;
/* If it is a root and its handle is cached then use it */
- if (!strlen(full_path) && !no_cached_open) {
- rc = open_shroot(xid, tcon, cifs_sb, &cfid);
- if (rc)
- goto out;
-
+ rc = open_cached_dir(xid, tcon, full_path, cifs_sb, &cfid);
+ if (!rc) {
if (tcon->crfid.file_all_info_is_valid) {
move_smb2_info_to_cifs(data,
&tcon->crfid.file_all_info);
@@ -540,7 +536,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
if (!rc)
move_smb2_info_to_cifs(data, smb2_data);
}
- close_shroot(cfid);
+ close_cached_dir(cfid);
goto out;
}
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index aac384f69f74..06d555d4da9a 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -667,6 +667,7 @@ smb2_is_valid_lease_break(char *buffer)
!memcmp(rsp->LeaseKey,
tcon->crfid.fid->lease_key,
SMB2_LEASE_KEY_SIZE)) {
+ tcon->crfid.time = 0;
INIT_WORK(&tcon->crfid.lease_break,
smb2_cached_lease_break);
queue_work(cifsiod_wq,
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f703204fb185..21ef51d338e0 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -690,17 +690,21 @@ smb2_close_cached_fid(struct kref *ref)
cfid->is_valid = false;
cfid->file_all_info_is_valid = false;
cfid->has_lease = false;
+ if (cfid->dentry) {
+ dput(cfid->dentry);
+ cfid->dentry = NULL;
+ }
}
}
-void close_shroot(struct cached_fid *cfid)
+void close_cached_dir(struct cached_fid *cfid)
{
mutex_lock(&cfid->fid_mutex);
kref_put(&cfid->refcount, smb2_close_cached_fid);
mutex_unlock(&cfid->fid_mutex);
}
-void close_shroot_lease_locked(struct cached_fid *cfid)
+void close_cached_dir_lease_locked(struct cached_fid *cfid)
{
if (cfid->has_lease) {
cfid->has_lease = false;
@@ -708,10 +712,10 @@ void close_shroot_lease_locked(struct cached_fid *cfid)
}
}
-void close_shroot_lease(struct cached_fid *cfid)
+void close_cached_dir_lease(struct cached_fid *cfid)
{
mutex_lock(&cfid->fid_mutex);
- close_shroot_lease_locked(cfid);
+ close_cached_dir_lease_locked(cfid);
mutex_unlock(&cfid->fid_mutex);
}
@@ -721,13 +725,15 @@ smb2_cached_lease_break(struct work_struct *work)
struct cached_fid *cfid = container_of(work,
struct cached_fid, lease_break);
- close_shroot_lease(cfid);
+ close_cached_dir_lease(cfid);
}
/*
- * Open the directory at the root of a share
+ * Open the and cache a directory handle.
+ * Only supported for the root handle.
*/
-int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
+int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
+ const char *path,
struct cifs_sb_info *cifs_sb,
struct cached_fid **cfid)
{
@@ -745,6 +751,18 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
__le16 utf16_path = 0; /* Null - since an open of top of share */
u8 oplock = SMB2_OPLOCK_LEVEL_II;
struct cifs_fid *pfid;
+ struct dentry *dentry;
+
+ if (tcon->nohandlecache)
+ return -ENOTSUPP;
+
+ if (cifs_sb->root == NULL)
+ return -ENOENT;
+
+ if (strlen(path))
+ return -ENOENT;
+
+ dentry = cifs_sb->root;
mutex_lock(&tcon->crfid.fid_mutex);
if (tcon->crfid.is_valid) {
@@ -830,11 +848,9 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
};
/*
- * caller expects this func to set pfid to a valid
- * cached root, so we copy the existing one and get a
- * reference.
+ * caller expects this func to set the fid in crfid to valid
+ * cached root, so increment the refcount.
*/
- memcpy(pfid, tcon->crfid.fid, sizeof(*pfid));
kref_get(&tcon->crfid.refcount);
mutex_unlock(&tcon->crfid.fid_mutex);
@@ -867,13 +883,18 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
oparms.fid->mid = le64_to_cpu(o_rsp->sync_hdr.MessageId);
#endif /* CIFS_DEBUG2 */
- memcpy(tcon->crfid.fid, pfid, sizeof(struct cifs_fid));
tcon->crfid.tcon = tcon;
tcon->crfid.is_valid = true;
+ tcon->crfid.dentry = dentry;
+ dget(dentry);
kref_init(&tcon->crfid.refcount);
/* BB TBD check to see if oplock level check can be removed below */
if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) {
+ /*
+ * See commit 2f94a3125b87. Increment the refcount when we
+ * get a lease for root, release it if lease break occurs
+ */
kref_get(&tcon->crfid.refcount);
tcon->crfid.has_lease = true;
smb2_parse_contexts(server, o_rsp,
@@ -892,6 +913,8 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
&rsp_iov[1], sizeof(struct smb2_file_all_info),
(char *)&tcon->crfid.file_all_info))
tcon->crfid.file_all_info_is_valid = true;
+ tcon->crfid.time = jiffies;
+
oshr_exit:
mutex_unlock(&tcon->crfid.fid_mutex);
@@ -905,6 +928,22 @@ oshr_free:
return rc;
}
+int open_cached_dir_by_dentry(struct cifs_tcon *tcon,
+ struct dentry *dentry,
+ struct cached_fid **cfid)
+{
+ mutex_lock(&tcon->crfid.fid_mutex);
+ if (tcon->crfid.dentry == dentry) {
+ cifs_dbg(FYI, "found a cached root file handle by dentry\n");
+ *cfid = &tcon->crfid;
+ kref_get(&tcon->crfid.refcount);
+ mutex_unlock(&tcon->crfid.fid_mutex);
+ return 0;
+ }
+ mutex_unlock(&tcon->crfid.fid_mutex);
+ return -ENOENT;
+}
+
static void
smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb)
@@ -914,7 +953,6 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct cifs_open_parms oparms;
struct cifs_fid fid;
- bool no_cached_open = tcon->nohandlecache;
struct cached_fid *cfid = NULL;
oparms.tcon = tcon;
@@ -924,14 +962,12 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
oparms.fid = &fid;
oparms.reconnect = false;
- if (no_cached_open) {
+ rc = open_cached_dir(xid, tcon, "", cifs_sb, &cfid);
+ if (rc == 0)
+ memcpy(&fid, cfid->fid, sizeof(struct cifs_fid));
+ else
rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL,
NULL, NULL);
- } else {
- rc = open_shroot(xid, tcon, cifs_sb, &cfid);
- if (rc == 0)
- memcpy(&fid, cfid->fid, sizeof(struct cifs_fid));
- }
if (rc)
return;
@@ -945,10 +981,10 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
FS_VOLUME_INFORMATION);
SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */
- if (no_cached_open)
+ if (cfid == NULL)
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
else
- close_shroot(cfid);
+ close_cached_dir(cfid);
}
static void
@@ -1531,7 +1567,10 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
NULL, 0 /* no input */, CIFSMaxBufSize,
(char **)&res_key, &ret_data_len);
- if (rc) {
+ if (rc == -EOPNOTSUPP) {
+ pr_warn_once("Server share %s does not support copy range\n", tcon->treeName);
+ goto req_res_key_exit;
+ } else if (rc) {
cifs_tcon_dbg(VFS, "refcpy ioctl error %d getting resume key\n", rc);
goto req_res_key_exit;
}
@@ -1763,18 +1802,14 @@ smb2_ioctl_query_info(const unsigned int xid,
}
iqinf_exit:
- kfree(vars);
- kfree(buffer);
- SMB2_open_free(&rqst[0]);
- if (qi.flags & PASSTHRU_FSCTL)
- SMB2_ioctl_free(&rqst[1]);
- else
- SMB2_query_info_free(&rqst[1]);
-
- SMB2_close_free(&rqst[2]);
+ cifs_small_buf_release(rqst[0].rq_iov[0].iov_base);
+ cifs_small_buf_release(rqst[1].rq_iov[0].iov_base);
+ cifs_small_buf_release(rqst[2].rq_iov[0].iov_base);
free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
+ kfree(vars);
+ kfree(buffer);
return rc;
e_fault:
@@ -1826,6 +1861,8 @@ smb2_copychunk_range(const unsigned int xid,
cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
/* Request server copy to target from src identified by key */
+ kfree(retbuf);
+ retbuf = NULL;
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
true /* is_fsctl */, (char *)pcchunk,
@@ -2217,22 +2254,23 @@ smb3_notify(const unsigned int xid, struct file *pfile,
struct smb3_notify notify;
struct dentry *dentry = pfile->f_path.dentry;
struct inode *inode = file_inode(pfile);
- struct cifs_sb_info *cifs_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_open_parms oparms;
struct cifs_fid fid;
struct cifs_tcon *tcon;
- unsigned char *path = NULL;
+ const unsigned char *path;
+ void *page = alloc_dentry_path();
__le16 *utf16_path = NULL;
u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
int rc = 0;
- path = build_path_from_dentry(dentry);
- if (path == NULL)
- return -ENOMEM;
-
- cifs_sb = CIFS_SB(inode->i_sb);
+ path = build_path_from_dentry(dentry, page);
+ if (IS_ERR(path)) {
+ rc = PTR_ERR(path);
+ goto notify_exit;
+ }
- utf16_path = cifs_convert_path_to_utf16(path + 1, cifs_sb);
+ utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
if (utf16_path == NULL) {
rc = -ENOMEM;
goto notify_exit;
@@ -2264,7 +2302,7 @@ smb3_notify(const unsigned int xid, struct file *pfile,
cifs_dbg(FYI, "change notify for path %s rc %d\n", path, rc);
notify_exit:
- kfree(path);
+ free_dentry_path(page);
kfree(utf16_path);
return rc;
}
@@ -3652,6 +3690,77 @@ out:
return rc;
}
+static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
+ loff_t off, loff_t len)
+{
+ int rc;
+ unsigned int xid;
+ struct cifsFileInfo *cfile = file->private_data;
+ __le64 eof;
+
+ xid = get_xid();
+
+ if (off >= i_size_read(file->f_inode) ||
+ off + len >= i_size_read(file->f_inode)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ rc = smb2_copychunk_range(xid, cfile, cfile, off + len,
+ i_size_read(file->f_inode) - off - len, off);
+ if (rc < 0)
+ goto out;
+
+ eof = cpu_to_le64(i_size_read(file->f_inode) - len);
+ rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, cfile->pid, &eof);
+ if (rc < 0)
+ goto out;
+
+ rc = 0;
+ out:
+ free_xid(xid);
+ return rc;
+}
+
+static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon,
+ loff_t off, loff_t len)
+{
+ int rc;
+ unsigned int xid;
+ struct cifsFileInfo *cfile = file->private_data;
+ __le64 eof;
+ __u64 count;
+
+ xid = get_xid();
+
+ if (off >= i_size_read(file->f_inode)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ count = i_size_read(file->f_inode) - off;
+ eof = cpu_to_le64(i_size_read(file->f_inode) + len);
+
+ rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, cfile->pid, &eof);
+ if (rc < 0)
+ goto out;
+
+ rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len);
+ if (rc < 0)
+ goto out;
+
+ rc = smb3_zero_range(file, tcon, off, len, 1);
+ if (rc < 0)
+ goto out;
+
+ rc = 0;
+ out:
+ free_xid(xid);
+ return rc;
+}
+
static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offset, int whence)
{
struct cifsFileInfo *wrcfile, *cfile = file->private_data;
@@ -3823,6 +3932,10 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
return smb3_zero_range(file, tcon, off, len, false);
} else if (mode == FALLOC_FL_KEEP_SIZE)
return smb3_simple_falloc(file, tcon, off, len, true);
+ else if (mode == FALLOC_FL_COLLAPSE_RANGE)
+ return smb3_collapse_range(file, tcon, off, len);
+ else if (mode == FALLOC_FL_INSERT_RANGE)
+ return smb3_insert_range(file, tcon, off, len);
else if (mode == 0)
return smb3_simple_falloc(file, tcon, off, len, false);
@@ -3870,6 +3983,7 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
unsigned int epoch, bool *purge_cache)
{
oplock &= 0xFF;
+ cinode->lease_granted = false;
if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
return;
if (oplock == SMB2_OPLOCK_LEVEL_BATCH) {
@@ -3896,6 +4010,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
unsigned int new_oplock = 0;
oplock &= 0xFF;
+ cinode->lease_granted = true;
if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
return;
@@ -4178,7 +4293,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
}
spin_unlock(&cifs_tcp_ses_lock);
- return 1;
+ return -EAGAIN;
}
/*
* Encrypt or decrypt @rqst message. @rqst[0] has the following format:
@@ -4968,7 +5083,7 @@ smb2_next_header(char *buf)
static int
smb2_make_node(unsigned int xid, struct inode *inode,
struct dentry *dentry, struct cifs_tcon *tcon,
- char *full_path, umode_t mode, dev_t dev)
+ const char *full_path, umode_t mode, dev_t dev)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
int rc = -EPERM;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2199a9bfae8f..c205f93e0a10 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -841,6 +841,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
req->SecurityMode = 0;
req->Capabilities = cpu_to_le32(server->vals->req_capabilities);
+ if (ses->chan_max > 1)
+ req->Capabilities |= cpu_to_le32(SMB2_GLOBAL_CAP_MULTI_CHANNEL);
/* ClientGUID must be zero for SMB2.02 dialect */
if (server->vals->protocol_id == SMB20_PROT_ID)
@@ -956,6 +958,13 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
/* Internal types */
server->capabilities |= SMB2_NT_FIND | SMB2_LARGE_FILES;
+ /*
+ * SMB3.0 supports only 1 cipher and doesn't have a encryption neg context
+ * Set the cipher type manually.
+ */
+ if (server->dialect == SMB30_PROT_ID && (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION))
+ server->cipher_type = SMB2_ENCRYPTION_AES128_CCM;
+
security_blob = smb2_get_data_area_len(&blob_offset, &blob_length,
(struct smb2_sync_hdr *)rsp);
/*
@@ -1032,6 +1041,9 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
pneg_inbuf->Capabilities =
cpu_to_le32(server->vals->req_capabilities);
+ if (tcon->ses->chan_max > 1)
+ pneg_inbuf->Capabilities |= cpu_to_le32(SMB2_GLOBAL_CAP_MULTI_CHANNEL);
+
memcpy(pneg_inbuf->Guid, server->client_guid,
SMB2_CLIENT_GUID_SIZE);
@@ -1857,7 +1869,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
return 0;
- close_shroot_lease(&tcon->crfid);
+ close_cached_dir_lease(&tcon->crfid);
rc = smb2_plain_req_init(SMB2_TREE_DISCONNECT, tcon, ses->server,
(void **) &req,
@@ -3895,10 +3907,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
* Related requests use info from previous read request
* in chain.
*/
- shdr->SessionId = 0xFFFFFFFF;
+ shdr->SessionId = 0xFFFFFFFFFFFFFFFF;
shdr->TreeId = 0xFFFFFFFF;
- req->PersistentFileId = 0xFFFFFFFF;
- req->VolatileFileId = 0xFFFFFFFF;
+ req->PersistentFileId = 0xFFFFFFFFFFFFFFFF;
+ req->VolatileFileId = 0xFFFFFFFFFFFFFFFF;
}
}
if (remaining_bytes > io_parms->length)
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index a5a9e33c0d73..6442dc1c292b 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -144,7 +144,7 @@ struct smb2_transform_hdr {
} __packed;
/* See MS-SMB2 2.2.42 */
-struct smb2_compression_transform_hdr {
+struct smb2_compression_transform_hdr_unchained {
__le32 ProtocolId; /* 0xFC 'S' 'M' 'B' */
__le32 OriginalCompressedSegmentSize;
__le16 CompressionAlgorithm;
@@ -160,10 +160,17 @@ struct compression_payload_header {
__le16 CompressionAlgorithm;
__le16 Flags;
__le32 Length; /* length of compressed playload including field below if present */
- /* __le32 OriginalPayloadSize; */ /* optional */
+ /* __le32 OriginalPayloadSize; */ /* optional, present when LZNT1, LZ77, LZ77+Huffman */
} __packed;
/* See MS-SMB2 2.2.42.2 */
+struct smb2_compression_transform_hdr_chained {
+ __le32 ProtocolId; /* 0xFC 'S' 'M' 'B' */
+ __le32 OriginalCompressedSegmentSize;
+ /* struct compression_payload_header[] */
+} __packed;
+
+/* See MS-SMB2 2.2.42.2.2 */
struct compression_pattern_payload_v1 {
__le16 Pattern;
__le16 Reserved1;
@@ -181,7 +188,11 @@ struct smb2_rdma_transform {
__le32 Reserved2;
} __packed;
-struct smb2_rdma_encryption_transform {
+/* TransformType */
+#define SMB2_RDMA_TRANSFORM_TYPE_ENCRYPTION 0x0001
+#define SMB2_RDMA_TRANSFORM_TYPE_SIGNING 0x0002
+
+struct smb2_rdma_crypto_transform {
__le16 TransformType;
__le16 SignatureLength;
__le16 NonceLength;
@@ -409,13 +420,29 @@ struct smb2_netname_neg_context {
} __packed;
/*
- * For rdma transform capabilities context see MS-SMB2 2.2.3.1.6
+ * For smb2_transport_capabilities context see MS-SMB2 2.2.3.1.5
* and 2.2.4.1.5
*/
+/* Flags */
+#define SMB2_ACCEPT_TRANSFORM_LEVEL_SECURITY 0x00000001
+
+struct smb2_transport_capabilities_context {
+ __le16 ContextType; /* 6 */
+ __le16 DataLength;
+ __u32 Reserved;
+ __le32 Flags;
+} __packed;
+
+/*
+ * For rdma transform capabilities context see MS-SMB2 2.2.3.1.6
+ * and 2.2.4.1.6
+ */
+
/* RDMA Transform IDs */
#define SMB2_RDMA_TRANSFORM_NONE 0x0000
#define SMB2_RDMA_TRANSFORM_ENCRYPTION 0x0001
+#define SMB2_RDMA_TRANSFORM_SIGNING 0x0002
struct smb2_rdma_transform_capabilities_context {
__le16 ContextType; /* 7 */
@@ -427,6 +454,11 @@ struct smb2_rdma_transform_capabilities_context {
__le16 RDMATransformIds[];
} __packed;
+/*
+ * For signing capabilities context see MS-SMB2 2.2.3.1.7
+ * and 2.2.4.1.7
+ */
+
/* Signing algorithms */
#define SIGNING_ALG_HMAC_SHA256 0
#define SIGNING_ALG_AES_CMAC 1
@@ -634,7 +666,8 @@ struct smb2_tree_connect_rsp {
#define SHI1005_FLAGS_ENABLE_HASH_V2 0x00004000
#define SHI1005_FLAGS_ENCRYPT_DATA 0x00008000
#define SMB2_SHAREFLAG_IDENTITY_REMOTING 0x00040000 /* 3.1.1 */
-#define SHI1005_FLAGS_ALL 0x0004FF33
+#define SMB2_SHAREFLAG_COMPRESS_DATA 0x00100000 /* 3.1.1 */
+#define SHI1005_FLAGS_ALL 0x0014FF33
/* Possible share capabilities */
#define SMB2_SHARE_CAP_DFS cpu_to_le32(0x00000008) /* all dialects */
@@ -1390,7 +1423,11 @@ struct smb2_lock_req {
struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 48 */
__le16 LockCount;
- __le32 Reserved;
+ /*
+ * The least significant four bits are the index, the other 28 bits are
+ * the lock sequence number (0 to 64). See MS-SMB2 2.2.26
+ */
+ __le32 LockSequenceNumber;
__u64 PersistentFileId; /* opaque endianness */
__u64 VolatileFileId; /* opaque endianness */
/* Followed by at least one */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index a2eb34a8d9c9..a5f87b02cfaf 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -69,12 +69,16 @@ extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
extern int smb3_handle_read_data(struct TCP_Server_Info *server,
struct mid_q_entry *mid);
-extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb,
- struct cached_fid **cfid);
-extern void close_shroot(struct cached_fid *cfid);
-extern void close_shroot_lease(struct cached_fid *cfid);
-extern void close_shroot_lease_locked(struct cached_fid *cfid);
+extern int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
+ const char *path,
+ struct cifs_sb_info *cifs_sb,
+ struct cached_fid **cfid);
+extern int open_cached_dir_by_dentry(struct cifs_tcon *tcon,
+ struct dentry *dentry,
+ struct cached_fid **cfid);
+extern void close_cached_dir(struct cached_fid *cfid);
+extern void close_cached_dir_lease(struct cached_fid *cfid);
+extern void close_cached_dir_lease_locked(struct cached_fid *cfid);
extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
struct smb2_file_all_info *src);
extern int smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index d6df908dccad..dafcb6ab050d 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -12,6 +12,11 @@
#include <linux/tracepoint.h>
+/*
+ * Please use this 3-part article as a reference for writing new tracepoints:
+ * https://lwn.net/Articles/379903/
+ */
+
/* For logging errors in read or write */
DECLARE_EVENT_CLASS(smb3_rw_err_class,
TP_PROTO(unsigned int xid,
@@ -529,16 +534,16 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class,
TP_ARGS(xid, func_name, rc),
TP_STRUCT__entry(
__field(unsigned int, xid)
- __field(const char *, func_name)
+ __string(func_name, func_name)
__field(int, rc)
),
TP_fast_assign(
__entry->xid = xid;
- __entry->func_name = func_name;
+ __assign_str(func_name, func_name);
__entry->rc = rc;
),
TP_printk("\t%s: xid=%u rc=%d",
- __entry->func_name, __entry->xid, __entry->rc)
+ __get_str(func_name), __entry->xid, __entry->rc)
)
#define DEFINE_SMB3_EXIT_ERR_EVENT(name) \
@@ -583,14 +588,14 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class,
TP_ARGS(xid, func_name),
TP_STRUCT__entry(
__field(unsigned int, xid)
- __field(const char *, func_name)
+ __string(func_name, func_name)
),
TP_fast_assign(
__entry->xid = xid;
- __entry->func_name = func_name;
+ __assign_str(func_name, func_name);
),
TP_printk("\t%s: xid=%u",
- __entry->func_name, __entry->xid)
+ __get_str(func_name), __entry->xid)
)
#define DEFINE_SMB3_ENTER_EXIT_EVENT(name) \
@@ -857,16 +862,16 @@ DECLARE_EVENT_CLASS(smb3_reconnect_class,
TP_STRUCT__entry(
__field(__u64, currmid)
__field(__u64, conn_id)
- __field(char *, hostname)
+ __string(hostname, hostname)
),
TP_fast_assign(
__entry->currmid = currmid;
__entry->conn_id = conn_id;
- __entry->hostname = hostname;
+ __assign_str(hostname, hostname);
),
TP_printk("conn_id=0x%llx server=%s current_mid=%llu",
__entry->conn_id,
- __entry->hostname,
+ __get_str(hostname),
__entry->currmid)
)
@@ -891,7 +896,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class,
TP_STRUCT__entry(
__field(__u64, currmid)
__field(__u64, conn_id)
- __field(char *, hostname)
+ __string(hostname, hostname)
__field(int, credits)
__field(int, credits_to_add)
__field(int, in_flight)
@@ -899,7 +904,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class,
TP_fast_assign(
__entry->currmid = currmid;
__entry->conn_id = conn_id;
- __entry->hostname = hostname;
+ __assign_str(hostname, hostname);
__entry->credits = credits;
__entry->credits_to_add = credits_to_add;
__entry->in_flight = in_flight;
@@ -907,7 +912,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class,
TP_printk("conn_id=0x%llx server=%s current_mid=%llu "
"credits=%d credit_change=%d in_flight=%d",
__entry->conn_id,
- __entry->hostname,
+ __get_str(hostname),
__entry->currmid,
__entry->credits,
__entry->credits_to_add,
diff --git a/fs/cifs/unc.c b/fs/cifs/unc.c
index 394aa00cea40..f6fc5e343ea4 100644
--- a/fs/cifs/unc.c
+++ b/fs/cifs/unc.c
@@ -50,7 +50,6 @@ char *extract_sharename(const char *unc)
{
const char *src;
char *delim, *dst;
- int len;
/* skip double chars at the beginning */
src = unc + 2;
@@ -60,10 +59,9 @@ char *extract_sharename(const char *unc)
if (!delim)
return ERR_PTR(-EINVAL);
delim++;
- len = strlen(delim);
/* caller has to free the memory */
- dst = kstrndup(delim, len, GFP_KERNEL);
+ dst = kstrdup(delim, GFP_KERNEL);
if (!dst)
return ERR_PTR(-ENOMEM);
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 41a611e76bb7..aa3e8ca0457c 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -30,6 +30,7 @@
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
#include "cifs_unicode.h"
+#include "cifs_ioctl.h"
#define MAX_EA_VALUE_SIZE CIFSMaxBufSize
#define CIFS_XATTR_CIFS_ACL "system.cifs_acl" /* DACL only */
@@ -53,7 +54,7 @@ enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT,
XATTR_CIFS_NTSD, XATTR_CIFS_NTSD_FULL };
static int cifs_attrib_set(unsigned int xid, struct cifs_tcon *pTcon,
- struct inode *inode, char *full_path,
+ struct inode *inode, const char *full_path,
const void *value, size_t size)
{
ssize_t rc = -EOPNOTSUPP;
@@ -77,7 +78,7 @@ static int cifs_attrib_set(unsigned int xid, struct cifs_tcon *pTcon,
}
static int cifs_creation_time_set(unsigned int xid, struct cifs_tcon *pTcon,
- struct inode *inode, char *full_path,
+ struct inode *inode, const char *full_path,
const void *value, size_t size)
{
ssize_t rc = -EOPNOTSUPP;
@@ -112,7 +113,8 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct tcon_link *tlink;
struct cifs_tcon *pTcon;
- char *full_path;
+ const char *full_path;
+ void *page;
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
@@ -120,10 +122,11 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
pTcon = tlink_tcon(tlink);
xid = get_xid();
+ page = alloc_dentry_path();
- full_path = build_path_from_dentry(dentry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ full_path = build_path_from_dentry(dentry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto out;
}
/* return dos attributes as pseudo xattr */
@@ -235,7 +238,7 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
}
out:
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
return rc;
@@ -297,7 +300,8 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct tcon_link *tlink;
struct cifs_tcon *pTcon;
- char *full_path;
+ const char *full_path;
+ void *page;
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
@@ -305,10 +309,11 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
pTcon = tlink_tcon(tlink);
xid = get_xid();
+ page = alloc_dentry_path();
- full_path = build_path_from_dentry(dentry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ full_path = build_path_from_dentry(dentry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto out;
}
@@ -401,7 +406,7 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
rc = -EOPNOTSUPP;
out:
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
return rc;
@@ -414,7 +419,11 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
struct tcon_link *tlink;
struct cifs_tcon *pTcon;
- char *full_path;
+ const char *full_path;
+ void *page;
+
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
return -EOPNOTSUPP;
@@ -425,10 +434,11 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
pTcon = tlink_tcon(tlink);
xid = get_xid();
+ page = alloc_dentry_path();
- full_path = build_path_from_dentry(direntry);
- if (full_path == NULL) {
- rc = -ENOMEM;
+ full_path = build_path_from_dentry(direntry, page);
+ if (IS_ERR(full_path)) {
+ rc = PTR_ERR(full_path);
goto list_ea_exit;
}
/* return dos attributes as pseudo xattr */
@@ -442,7 +452,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
full_path, NULL, data, buf_size, cifs_sb);
list_ea_exit:
- kfree(full_path);
+ free_dentry_path(page);
free_xid(xid);
cifs_put_tlink(tlink);
return rc;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 128d63df5bfb..ef5ca22bfb3e 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -175,10 +175,10 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma)
ret = call_mmap(vma->vm_file, vma);
if (ret) {
- /* if call_mmap fails, our caller will put coda_file so we
- * should drop the reference to the host_file that we got.
+ /* if call_mmap fails, our caller will put host_file so we
+ * should drop the reference to the coda_file that we got.
*/
- fput(host_file);
+ fput(coda_file);
kfree(cvm_ops);
} else {
/* here we add redirects for the open/close vm_operations */
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 9a3aed249692..c0395363eab9 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset:8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* configfs_internal.h - Internal stuff for configfs
*
* Based on sysfs:
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index b6098e02e20b..ac5e0c0e9181 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dir.c - Operations for configfs directories.
*
* Based on sysfs:
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index da8351d1e455..e26060dae70a 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* file.c - operations for regular (text) files.
*
* Based on sysfs:
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 42c348bb2903..eb5ec3e46283 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* inode.c - basic inode and dentry operations.
*
* Based on sysfs:
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 704a4356f137..254170a82aa3 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* item.c - library routines for handling generic config items
*
* Based on kobject:
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 0c6e8cf61953..c2d820063ec4 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* mount.c - operations for initializing and mounting configfs.
*
* Based on sysfs:
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 77c854364e60..0623c3edcfb9 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* symlink.c - operations for configfs symlinks.
*
* Based on sysfs:
diff --git a/fs/coredump.c b/fs/coredump.c
index 1c0fdc1aa70b..2868e3e171ae 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -809,6 +809,16 @@ void do_coredump(const kernel_siginfo_t *siginfo)
}
file_start_write(cprm.file);
core_dumped = binfmt->core_dump(&cprm);
+ /*
+ * Ensures that file size is big enough to contain the current
+ * file postion. This prevents gdb from complaining about
+ * a truncated file if the last "write" to the file was
+ * dump_skip.
+ */
+ if (cprm.to_skip) {
+ cprm.to_skip--;
+ dump_emit(&cprm, "", 1);
+ }
file_end_write(cprm.file);
}
if (ispipe && core_pipe_limit)
@@ -835,7 +845,7 @@ fail:
* do on a core-file: use only these functions to write out all the
* necessary info.
*/
-int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
+static int __dump_emit(struct coredump_params *cprm, const void *addr, int nr)
{
struct file *file = cprm->file;
loff_t pos = file->f_pos;
@@ -855,9 +865,8 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
return 1;
}
-EXPORT_SYMBOL(dump_emit);
-int dump_skip(struct coredump_params *cprm, size_t nr)
+static int __dump_skip(struct coredump_params *cprm, size_t nr)
{
static char zeroes[PAGE_SIZE];
struct file *file = cprm->file;
@@ -869,13 +878,35 @@ int dump_skip(struct coredump_params *cprm, size_t nr)
return 1;
} else {
while (nr > PAGE_SIZE) {
- if (!dump_emit(cprm, zeroes, PAGE_SIZE))
+ if (!__dump_emit(cprm, zeroes, PAGE_SIZE))
return 0;
nr -= PAGE_SIZE;
}
- return dump_emit(cprm, zeroes, nr);
+ return __dump_emit(cprm, zeroes, nr);
}
}
+
+int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
+{
+ if (cprm->to_skip) {
+ if (!__dump_skip(cprm, cprm->to_skip))
+ return 0;
+ cprm->to_skip = 0;
+ }
+ return __dump_emit(cprm, addr, nr);
+}
+EXPORT_SYMBOL(dump_emit);
+
+void dump_skip_to(struct coredump_params *cprm, unsigned long pos)
+{
+ cprm->to_skip = pos - cprm->pos;
+}
+EXPORT_SYMBOL(dump_skip_to);
+
+void dump_skip(struct coredump_params *cprm, size_t nr)
+{
+ cprm->to_skip += nr;
+}
EXPORT_SYMBOL(dump_skip);
#ifdef CONFIG_ELF_CORE
@@ -902,11 +933,11 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
kunmap_local(kaddr);
put_page(page);
+ if (stop)
+ return 0;
} else {
- stop = !dump_skip(cprm, PAGE_SIZE);
+ dump_skip(cprm, PAGE_SIZE);
}
- if (stop)
- return 0;
}
return 1;
}
@@ -914,33 +945,16 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
int dump_align(struct coredump_params *cprm, int align)
{
- unsigned mod = cprm->pos & (align - 1);
+ unsigned mod = (cprm->pos + cprm->to_skip) & (align - 1);
if (align & (align - 1))
return 0;
- return mod ? dump_skip(cprm, align - mod) : 1;
+ if (mod)
+ cprm->to_skip += align - mod;
+ return 1;
}
EXPORT_SYMBOL(dump_align);
/*
- * Ensures that file size is big enough to contain the current file
- * postion. This prevents gdb from complaining about a truncated file
- * if the last "write" to the file was dump_skip.
- */
-void dump_truncate(struct coredump_params *cprm)
-{
- struct file *file = cprm->file;
- loff_t offset;
-
- if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- offset = file->f_op->llseek(file, 0, SEEK_CUR);
- if (i_size_read(file->f_mapping->host) < offset)
- do_truncate(file_mnt_user_ns(file), file->f_path.dentry,
- offset, 0, file);
- }
-}
-EXPORT_SYMBOL(dump_truncate);
-
-/*
* The purpose of always_dump_vma() is to make sure that special kernel mappings
* that are useful for post-mortem analysis are included in every core dump.
* In that way we ensure that the core dump is fully interpretable later
diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
index a5f5c30368a2..2d0c8922f635 100644
--- a/fs/crypto/Kconfig
+++ b/fs/crypto/Kconfig
@@ -14,16 +14,30 @@ config FS_ENCRYPTION
F2FS and UBIFS make use of this feature.
# Filesystems supporting encryption must select this if FS_ENCRYPTION. This
-# allows the algorithms to be built as modules when all the filesystems are.
+# allows the algorithms to be built as modules when all the filesystems are,
+# whereas selecting them from FS_ENCRYPTION would force them to be built-in.
+#
+# Note: this option only pulls in the algorithms that filesystem encryption
+# needs "by default". If userspace will use "non-default" encryption modes such
+# as Adiantum encryption, then those other modes need to be explicitly enabled
+# in the crypto API; see Documentation/filesystems/fscrypt.rst for details.
+#
+# Also note that this option only pulls in the generic implementations of the
+# algorithms, not any per-architecture optimized implementations. It is
+# strongly recommended to enable optimized implementations too. It is safe to
+# disable these generic implementations if corresponding optimized
+# implementations will always be available too; for this reason, these are soft
+# dependencies ('imply' rather than 'select'). Only disable these generic
+# implementations if you're sure they will never be needed, though.
config FS_ENCRYPTION_ALGS
tristate
- select CRYPTO_AES
- select CRYPTO_CBC
- select CRYPTO_CTS
- select CRYPTO_ECB
- select CRYPTO_HMAC
- select CRYPTO_SHA512
- select CRYPTO_XTS
+ imply CRYPTO_AES
+ imply CRYPTO_CBC
+ imply CRYPTO_CTS
+ imply CRYPTO_ECB
+ imply CRYPTO_HMAC
+ imply CRYPTO_SHA512
+ imply CRYPTO_XTS
config FS_ENCRYPTION_INLINE_CRYPT
bool "Enable fscrypt to use inline crypto"
diff --git a/fs/d_path.c b/fs/d_path.c
index a69e2cd36e6e..270d62133996 100644
--- a/fs/d_path.c
+++ b/fs/d_path.c
@@ -326,9 +326,9 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
/*
* Write full pathname from the root of the filesystem into the buffer.
*/
-static char *__dentry_path(struct dentry *d, char *buf, int buflen)
+static char *__dentry_path(const struct dentry *d, char *buf, int buflen)
{
- struct dentry *dentry;
+ const struct dentry *dentry;
char *end, *retval;
int len, seq = 0;
int error = 0;
@@ -347,7 +347,7 @@ restart:
*retval = '/';
read_seqbegin_or_lock(&rename_lock, &seq);
while (!IS_ROOT(dentry)) {
- struct dentry *parent = dentry->d_parent;
+ const struct dentry *parent = dentry->d_parent;
prefetch(parent);
error = prepend_name(&end, &len, &dentry->d_name);
@@ -371,13 +371,13 @@ Elong:
return ERR_PTR(-ENAMETOOLONG);
}
-char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
+char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
{
return __dentry_path(dentry, buf, buflen);
}
EXPORT_SYMBOL(dentry_path_raw);
-char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+char *dentry_path(const struct dentry *dentry, char *buf, int buflen)
{
char *p = NULL;
char *retval;
diff --git a/fs/dax.c b/fs/dax.c
index b3d27fdc6775..62352cbcf0f4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -144,6 +144,16 @@ struct wait_exceptional_entry_queue {
struct exceptional_entry_key key;
};
+/**
+ * enum dax_wake_mode: waitqueue wakeup behaviour
+ * @WAKE_ALL: wake all waiters in the waitqueue
+ * @WAKE_NEXT: wake only the first waiter in the waitqueue
+ */
+enum dax_wake_mode {
+ WAKE_ALL,
+ WAKE_NEXT,
+};
+
static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
void *entry, struct exceptional_entry_key *key)
{
@@ -182,7 +192,8 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
* The important information it's conveying is whether the entry at
* this index used to be a PMD entry.
*/
-static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
+static void dax_wake_entry(struct xa_state *xas, void *entry,
+ enum dax_wake_mode mode)
{
struct exceptional_entry_key key;
wait_queue_head_t *wq;
@@ -196,7 +207,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
* must be in the waitqueue and the following check will see them.
*/
if (waitqueue_active(wq))
- __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
+ __wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, &key);
}
/*
@@ -264,11 +275,11 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
finish_wait(wq, &ewait.wait);
}
-static void put_unlocked_entry(struct xa_state *xas, void *entry)
+static void put_unlocked_entry(struct xa_state *xas, void *entry,
+ enum dax_wake_mode mode)
{
- /* If we were the only waiter woken, wake the next one */
if (entry && !dax_is_conflict(entry))
- dax_wake_entry(xas, entry, false);
+ dax_wake_entry(xas, entry, mode);
}
/*
@@ -286,7 +297,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry)
old = xas_store(xas, entry);
xas_unlock_irq(xas);
BUG_ON(!dax_is_locked(old));
- dax_wake_entry(xas, entry, false);
+ dax_wake_entry(xas, entry, WAKE_NEXT);
}
/*
@@ -524,8 +535,8 @@ retry:
dax_disassociate_entry(entry, mapping, false);
xas_store(xas, NULL); /* undo the PMD join */
- dax_wake_entry(xas, entry, true);
- mapping->nrexceptional--;
+ dax_wake_entry(xas, entry, WAKE_ALL);
+ mapping->nrpages -= PG_PMD_NR;
entry = NULL;
xas_set(xas, index);
}
@@ -541,7 +552,7 @@ retry:
dax_lock_entry(xas, entry);
if (xas_error(xas))
goto out_unlock;
- mapping->nrexceptional++;
+ mapping->nrpages += 1UL << order;
}
out_unlock:
@@ -622,7 +633,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
entry = get_unlocked_entry(&xas, 0);
if (entry)
page = dax_busy_page(entry);
- put_unlocked_entry(&xas, entry);
+ put_unlocked_entry(&xas, entry, WAKE_NEXT);
if (page)
break;
if (++scanned % XA_CHECK_SCHED)
@@ -661,10 +672,10 @@ static int __dax_invalidate_entry(struct address_space *mapping,
goto out;
dax_disassociate_entry(entry, mapping, trunc);
xas_store(&xas, NULL);
- mapping->nrexceptional--;
+ mapping->nrpages -= 1UL << dax_entry_order(entry);
ret = 1;
out:
- put_unlocked_entry(&xas, entry);
+ put_unlocked_entry(&xas, entry, WAKE_ALL);
xas_unlock_irq(&xas);
return ret;
}
@@ -937,13 +948,13 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
xas_lock_irq(xas);
xas_store(xas, entry);
xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
- dax_wake_entry(xas, entry, false);
+ dax_wake_entry(xas, entry, WAKE_NEXT);
trace_dax_writeback_one(mapping->host, index, count);
return ret;
put_unlocked:
- put_unlocked_entry(xas, entry);
+ put_unlocked_entry(xas, entry, WAKE_NEXT);
return ret;
}
@@ -965,7 +976,7 @@ int dax_writeback_mapping_range(struct address_space *mapping,
if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
return -EIO;
- if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
+ if (mapping_empty(mapping) || wbc->sync_mode != WB_SYNC_ALL)
return 0;
trace_dax_writeback_range(inode, xas.xa_index, end_index);
@@ -1684,7 +1695,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
/* Did we race with someone splitting entry or so? */
if (!entry || dax_is_conflict(entry) ||
(order == 0 && !dax_is_pte_entry(entry))) {
- put_unlocked_entry(&xas, entry);
+ put_unlocked_entry(&xas, entry, WAKE_NEXT);
xas_unlock_irq(&xas);
trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
VM_FAULT_NOPAGE);
diff --git a/fs/dcache.c b/fs/dcache.c
index 7d24ff7eb206..cf871a81f4fd 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -84,6 +84,8 @@ const struct qstr empty_name = QSTR_INIT("", 0);
EXPORT_SYMBOL(empty_name);
const struct qstr slash_name = QSTR_INIT("/", 1);
EXPORT_SYMBOL(slash_name);
+const struct qstr dotdot_name = QSTR_INIT("..", 2);
+EXPORT_SYMBOL(dotdot_name);
/*
* This is the single most critical data structure when it comes
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 686e0ad28788..e813acfaa6e8 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -773,7 +773,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_atomic_t);
ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
- char buf[3];
+ char buf[2];
bool val;
int r;
struct dentry *dentry = F_DENTRY(file);
@@ -789,7 +789,6 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
else
buf[0] = 'N';
buf[1] = '\n';
- buf[2] = 0x00;
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}
EXPORT_SYMBOL_GPL(debugfs_read_file_bool);
@@ -865,6 +864,97 @@ struct dentry *debugfs_create_bool(const char *name, umode_t mode,
}
EXPORT_SYMBOL_GPL(debugfs_create_bool);
+ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct dentry *dentry = F_DENTRY(file);
+ char *str, *copy = NULL;
+ int copy_len, len;
+ ssize_t ret;
+
+ ret = debugfs_file_get(dentry);
+ if (unlikely(ret))
+ return ret;
+
+ str = *(char **)file->private_data;
+ len = strlen(str) + 1;
+ copy = kmalloc(len, GFP_KERNEL);
+ if (!copy) {
+ debugfs_file_put(dentry);
+ return -ENOMEM;
+ }
+
+ copy_len = strscpy(copy, str, len);
+ debugfs_file_put(dentry);
+ if (copy_len < 0) {
+ kfree(copy);
+ return copy_len;
+ }
+
+ copy[copy_len] = '\n';
+
+ ret = simple_read_from_buffer(user_buf, count, ppos, copy, copy_len);
+ kfree(copy);
+
+ return ret;
+}
+
+static ssize_t debugfs_write_file_str(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ /* This is really only for read-only strings */
+ return -EINVAL;
+}
+
+static const struct file_operations fops_str = {
+ .read = debugfs_read_file_str,
+ .write = debugfs_write_file_str,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static const struct file_operations fops_str_ro = {
+ .read = debugfs_read_file_str,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static const struct file_operations fops_str_wo = {
+ .write = debugfs_write_file_str,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+/**
+ * debugfs_create_str - create a debugfs file that is used to read and write a string value
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is %NULL, then the
+ * file will be created in the root of the debugfs filesystem.
+ * @value: a pointer to the variable that the file should read to and write
+ * from.
+ *
+ * This function creates a file in debugfs with the given name that
+ * contains the value of the variable @value. If the @mode variable is so
+ * set, it can be read from, and written to.
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the debugfs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be
+ * returned.
+ *
+ * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will
+ * be returned.
+ */
+void debugfs_create_str(const char *name, umode_t mode,
+ struct dentry *parent, char **value)
+{
+ debugfs_create_mode_unsafe(name, mode, parent, value, &fops_str,
+ &fops_str_ro, &fops_str_wo);
+}
+
static ssize_t read_file_blob(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 22e86ae4dd5a..8129a430d789 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -35,7 +35,7 @@
static struct vfsmount *debugfs_mount;
static int debugfs_mount_count;
static bool debugfs_registered;
-static unsigned int debugfs_allow = DEFAULT_DEBUGFS_ALLOW_BITS;
+static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS;
/*
* Don't allow access attributes to be changed whilst the kernel is locked down
@@ -45,10 +45,13 @@ static unsigned int debugfs_allow = DEFAULT_DEBUGFS_ALLOW_BITS;
static int debugfs_setattr(struct user_namespace *mnt_userns,
struct dentry *dentry, struct iattr *ia)
{
- int ret = security_locked_down(LOCKDOWN_DEBUGFS);
+ int ret;
- if (ret && (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
- return ret;
+ if (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) {
+ ret = security_locked_down(LOCKDOWN_DEBUGFS);
+ if (ret)
+ return ret;
+ }
return simple_setattr(&init_user_ns, dentry, ia);
}
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 49c5f9407098..88d95d96e36c 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -125,7 +125,7 @@ static ssize_t cluster_cluster_name_store(struct config_item *item,
CONFIGFS_ATTR(cluster_, cluster_name);
static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
- int *info_field, bool (*check_cb)(unsigned int x),
+ int *info_field, int (*check_cb)(unsigned int x),
const char *buf, size_t len)
{
unsigned int x;
@@ -137,8 +137,11 @@ static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field,
if (rc)
return rc;
- if (check_cb && check_cb(x))
- return -EINVAL;
+ if (check_cb) {
+ rc = check_cb(x);
+ if (rc)
+ return rc;
+ }
*cl_field = x;
*info_field = x;
@@ -161,17 +164,53 @@ static ssize_t cluster_##name##_show(struct config_item *item, char *buf) \
} \
CONFIGFS_ATTR(cluster_, name);
-static bool dlm_check_zero(unsigned int x)
+static int dlm_check_protocol_and_dlm_running(unsigned int x)
+{
+ switch (x) {
+ case 0:
+ /* TCP */
+ break;
+ case 1:
+ /* SCTP */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (dlm_allow_conn)
+ return -EBUSY;
+
+ return 0;
+}
+
+static int dlm_check_zero_and_dlm_running(unsigned int x)
+{
+ if (!x)
+ return -EINVAL;
+
+ if (dlm_allow_conn)
+ return -EBUSY;
+
+ return 0;
+}
+
+static int dlm_check_zero(unsigned int x)
{
- return !x;
+ if (!x)
+ return -EINVAL;
+
+ return 0;
}
-static bool dlm_check_buffer_size(unsigned int x)
+static int dlm_check_buffer_size(unsigned int x)
{
- return (x < DEFAULT_BUFFER_SIZE);
+ if (x < DEFAULT_BUFFER_SIZE)
+ return -EINVAL;
+
+ return 0;
}
-CLUSTER_ATTR(tcp_port, dlm_check_zero);
+CLUSTER_ATTR(tcp_port, dlm_check_zero_and_dlm_running);
CLUSTER_ATTR(buffer_size, dlm_check_buffer_size);
CLUSTER_ATTR(rsbtbl_size, dlm_check_zero);
CLUSTER_ATTR(recover_timer, dlm_check_zero);
@@ -179,7 +218,7 @@ CLUSTER_ATTR(toss_secs, dlm_check_zero);
CLUSTER_ATTR(scan_secs, dlm_check_zero);
CLUSTER_ATTR(log_debug, NULL);
CLUSTER_ATTR(log_info, NULL);
-CLUSTER_ATTR(protocol, NULL);
+CLUSTER_ATTR(protocol, dlm_check_protocol_and_dlm_running);
CLUSTER_ATTR(mark, NULL);
CLUSTER_ATTR(timewarn_cs, dlm_check_zero);
CLUSTER_ATTR(waitwarn_us, NULL);
@@ -688,6 +727,7 @@ static ssize_t comm_mark_show(struct config_item *item, char *buf)
static ssize_t comm_mark_store(struct config_item *item, const char *buf,
size_t len)
{
+ struct dlm_comm *comm;
unsigned int mark;
int rc;
@@ -695,7 +735,15 @@ static ssize_t comm_mark_store(struct config_item *item, const char *buf,
if (rc)
return rc;
- config_item_to_comm(item)->mark = mark;
+ if (mark == 0)
+ mark = dlm_config.ci_mark;
+
+ comm = config_item_to_comm(item);
+ rc = dlm_lowcomms_nodes_set_mark(comm->nodeid, mark);
+ if (rc)
+ return rc;
+
+ comm->mark = mark;
return len;
}
@@ -870,24 +918,6 @@ int dlm_comm_seq(int nodeid, uint32_t *seq)
return 0;
}
-void dlm_comm_mark(int nodeid, unsigned int *mark)
-{
- struct dlm_comm *cm;
-
- cm = get_comm(nodeid);
- if (!cm) {
- *mark = dlm_config.ci_mark;
- return;
- }
-
- if (cm->mark)
- *mark = cm->mark;
- else
- *mark = dlm_config.ci_mark;
-
- put_comm(cm);
-}
-
int dlm_our_nodeid(void)
{
return local_comm ? local_comm->nodeid : 0;
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index c210250a2581..d2cd4bd20313 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -48,7 +48,6 @@ void dlm_config_exit(void);
int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out,
int *count_out);
int dlm_comm_seq(int nodeid, uint32_t *seq);
-void dlm_comm_mark(int nodeid, unsigned int *mark);
int dlm_our_nodeid(void);
int dlm_our_addr(struct sockaddr_storage *addr, int num);
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index d6bbccb0ed15..d5bd990bcab8 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -542,6 +542,7 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
if (bucket >= ls->ls_rsbtbl_size) {
kfree(ri);
+ ++*pos;
return NULL;
}
tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 002123efc6b0..b93df39d0915 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3541,8 +3541,6 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
if (!mh)
return -ENOBUFS;
- memset(mb, 0, mb_len);
-
ms = (struct dlm_message *) mb;
ms->m_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 561dcad08ad6..c14cf2b7faab 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -404,12 +404,6 @@ static int threads_start(void)
return error;
}
-static void threads_stop(void)
-{
- dlm_scand_stop();
- dlm_lowcomms_stop();
-}
-
static int new_lockspace(const char *name, const char *cluster,
uint32_t flags, int lvblen,
const struct dlm_lockspace_ops *ops, void *ops_arg,
@@ -702,8 +696,11 @@ int dlm_new_lockspace(const char *name, const char *cluster,
ls_count++;
if (error > 0)
error = 0;
- if (!ls_count)
- threads_stop();
+ if (!ls_count) {
+ dlm_scand_stop();
+ dlm_lowcomms_shutdown();
+ dlm_lowcomms_stop();
+ }
out:
mutex_unlock(&ls_lock);
return error;
@@ -788,6 +785,11 @@ static int release_lockspace(struct dlm_ls *ls, int force)
dlm_recoverd_stop(ls);
+ if (ls_count == 1) {
+ dlm_scand_stop();
+ dlm_lowcomms_shutdown();
+ }
+
dlm_callback_stop(ls);
remove_lockspace(ls);
@@ -880,7 +882,7 @@ int dlm_release_lockspace(void *lockspace, int force)
if (!error)
ls_count--;
if (!ls_count)
- threads_stop();
+ dlm_lowcomms_stop();
mutex_unlock(&ls_lock);
return error;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 372c34ff8594..166e36fcf3e4 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -102,6 +102,9 @@ struct listen_connection {
struct work_struct rwork;
};
+#define DLM_WQ_REMAIN_BYTES(e) (PAGE_SIZE - e->end)
+#define DLM_WQ_LENGTH_BYTES(e) (e->end - e->offset)
+
/* An entry waiting to be sent */
struct writequeue_entry {
struct list_head list;
@@ -116,6 +119,7 @@ struct writequeue_entry {
struct dlm_node_addr {
struct list_head list;
int nodeid;
+ int mark;
int addr_count;
int curr_addr_index;
struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
@@ -134,7 +138,7 @@ static DEFINE_SPINLOCK(dlm_node_addrs_spin);
static struct listen_connection listen_con;
static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT];
static int dlm_local_count;
-static int dlm_allow_conn;
+int dlm_allow_conn;
/* Work queues */
static struct workqueue_struct *recv_workqueue;
@@ -303,7 +307,8 @@ static int addr_compare(const struct sockaddr_storage *x,
}
static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
- struct sockaddr *sa_out, bool try_new_addr)
+ struct sockaddr *sa_out, bool try_new_addr,
+ unsigned int *mark)
{
struct sockaddr_storage sas;
struct dlm_node_addr *na;
@@ -331,6 +336,8 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
if (!na->addr_count)
return -ENOENT;
+ *mark = na->mark;
+
if (sas_out)
memcpy(sas_out, &sas, sizeof(struct sockaddr_storage));
@@ -350,7 +357,8 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out,
return 0;
}
-static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
+static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid,
+ unsigned int *mark)
{
struct dlm_node_addr *na;
int rv = -EEXIST;
@@ -364,6 +372,7 @@ static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
for (addr_i = 0; addr_i < na->addr_count; addr_i++) {
if (addr_compare(na->addr[addr_i], addr)) {
*nodeid = na->nodeid;
+ *mark = na->mark;
rv = 0;
goto unlock;
}
@@ -412,6 +421,7 @@ int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len)
new_node->nodeid = nodeid;
new_node->addr[0] = new_addr;
new_node->addr_count = 1;
+ new_node->mark = dlm_config.ci_mark;
list_add(&new_node->list, &dlm_node_addrs);
spin_unlock(&dlm_node_addrs_spin);
return 0;
@@ -519,6 +529,23 @@ int dlm_lowcomms_connect_node(int nodeid)
return 0;
}
+int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark)
+{
+ struct dlm_node_addr *na;
+
+ spin_lock(&dlm_node_addrs_spin);
+ na = find_node_addr(nodeid);
+ if (!na) {
+ spin_unlock(&dlm_node_addrs_spin);
+ return -ENOENT;
+ }
+
+ na->mark = mark;
+ spin_unlock(&dlm_node_addrs_spin);
+
+ return 0;
+}
+
static void lowcomms_error_report(struct sock *sk)
{
struct connection *con;
@@ -685,10 +712,7 @@ static void shutdown_connection(struct connection *con)
{
int ret;
- if (cancel_work_sync(&con->swork)) {
- log_print("canceled swork for node %d", con->nodeid);
- clear_bit(CF_WRITE_PENDING, &con->flags);
- }
+ flush_work(&con->swork);
mutex_lock(&con->sock_mutex);
/* nothing to shutdown */
@@ -867,7 +891,7 @@ static int accept_from_sock(struct listen_connection *con)
/* Get the new node's NODEID */
make_sockaddr(&peeraddr, 0, &len);
- if (addr_to_nodeid(&peeraddr, &nodeid)) {
+ if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) {
unsigned char *b=(unsigned char *)&peeraddr;
log_print("connect from non cluster node");
print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
@@ -876,9 +900,6 @@ static int accept_from_sock(struct listen_connection *con)
return -1;
}
- dlm_comm_mark(nodeid, &mark);
- sock_set_mark(newsock->sk, mark);
-
log_print("got connection from %d", nodeid);
/* Check to see if we already have a connection to this node. This
@@ -892,6 +913,8 @@ static int accept_from_sock(struct listen_connection *con)
goto accept_err;
}
+ sock_set_mark(newsock->sk, mark);
+
mutex_lock(&newcon->sock_mutex);
if (newcon->sock) {
struct connection *othercon = newcon->othercon;
@@ -908,16 +931,18 @@ static int accept_from_sock(struct listen_connection *con)
result = dlm_con_init(othercon, nodeid);
if (result < 0) {
kfree(othercon);
+ mutex_unlock(&newcon->sock_mutex);
goto accept_err;
}
+ lockdep_set_subclass(&othercon->sock_mutex, 1);
newcon->othercon = othercon;
} else {
/* close other sock con if we have something new */
close_connection(othercon, false, true, false);
}
- mutex_lock_nested(&othercon->sock_mutex, 1);
+ mutex_lock(&othercon->sock_mutex);
add_sock(newsock, othercon);
addcon = othercon;
mutex_unlock(&othercon->sock_mutex);
@@ -930,6 +955,7 @@ static int accept_from_sock(struct listen_connection *con)
addcon = newcon;
}
+ set_bit(CF_CONNECTED, &addcon->flags);
mutex_unlock(&newcon->sock_mutex);
/*
@@ -1015,8 +1041,6 @@ static void sctp_connect_to_sock(struct connection *con)
struct socket *sock;
unsigned int mark;
- dlm_comm_mark(con->nodeid, &mark);
-
mutex_lock(&con->sock_mutex);
/* Some odd races can cause double-connects, ignore them */
@@ -1029,7 +1053,7 @@ static void sctp_connect_to_sock(struct connection *con)
}
memset(&daddr, 0, sizeof(daddr));
- result = nodeid_to_addr(con->nodeid, &daddr, NULL, true);
+ result = nodeid_to_addr(con->nodeid, &daddr, NULL, true, &mark);
if (result < 0) {
log_print("no address for nodeid %d", con->nodeid);
goto out;
@@ -1104,13 +1128,11 @@ out:
static void tcp_connect_to_sock(struct connection *con)
{
struct sockaddr_storage saddr, src_addr;
+ unsigned int mark;
int addr_len;
struct socket *sock = NULL;
- unsigned int mark;
int result;
- dlm_comm_mark(con->nodeid, &mark);
-
mutex_lock(&con->sock_mutex);
if (con->retries++ > MAX_CONNECT_RETRIES)
goto out;
@@ -1125,15 +1147,15 @@ static void tcp_connect_to_sock(struct connection *con)
if (result < 0)
goto out_err;
- sock_set_mark(sock->sk, mark);
-
memset(&saddr, 0, sizeof(saddr));
- result = nodeid_to_addr(con->nodeid, &saddr, NULL, false);
+ result = nodeid_to_addr(con->nodeid, &saddr, NULL, false, &mark);
if (result < 0) {
log_print("no address for nodeid %d", con->nodeid);
goto out_err;
}
+ sock_set_mark(sock->sk, mark);
+
add_sock(sock, con);
/* Bind to our cluster-known address connecting to avoid
@@ -1330,70 +1352,72 @@ static struct writequeue_entry *new_writequeue_entry(struct connection *con,
{
struct writequeue_entry *entry;
- entry = kmalloc(sizeof(struct writequeue_entry), allocation);
+ entry = kzalloc(sizeof(*entry), allocation);
if (!entry)
return NULL;
- entry->page = alloc_page(allocation);
+ entry->page = alloc_page(allocation | __GFP_ZERO);
if (!entry->page) {
kfree(entry);
return NULL;
}
- entry->offset = 0;
- entry->len = 0;
- entry->end = 0;
- entry->users = 0;
entry->con = con;
+ entry->users = 1;
return entry;
}
-void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
+static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
+ gfp_t allocation, char **ppc)
{
- struct connection *con;
struct writequeue_entry *e;
- int offset = 0;
- if (len > LOWCOMMS_MAX_TX_BUFFER_LEN) {
- BUILD_BUG_ON(PAGE_SIZE < LOWCOMMS_MAX_TX_BUFFER_LEN);
- log_print("failed to allocate a buffer of size %d", len);
- return NULL;
+ spin_lock(&con->writequeue_lock);
+ if (!list_empty(&con->writequeue)) {
+ e = list_last_entry(&con->writequeue, struct writequeue_entry, list);
+ if (DLM_WQ_REMAIN_BYTES(e) >= len) {
+ *ppc = page_address(e->page) + e->end;
+ e->end += len;
+ e->users++;
+ spin_unlock(&con->writequeue_lock);
+
+ return e;
+ }
}
+ spin_unlock(&con->writequeue_lock);
- con = nodeid2con(nodeid, allocation);
- if (!con)
+ e = new_writequeue_entry(con, allocation);
+ if (!e)
return NULL;
+ *ppc = page_address(e->page);
+ e->end += len;
+
spin_lock(&con->writequeue_lock);
- e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
- if ((&e->list == &con->writequeue) ||
- (PAGE_SIZE - e->end < len)) {
- e = NULL;
- } else {
- offset = e->end;
- e->end += len;
- e->users++;
- }
+ list_add_tail(&e->list, &con->writequeue);
spin_unlock(&con->writequeue_lock);
- if (e) {
- got_one:
- *ppc = page_address(e->page) + offset;
- return e;
- }
+ return e;
+};
- e = new_writequeue_entry(con, allocation);
- if (e) {
- spin_lock(&con->writequeue_lock);
- offset = e->end;
- e->end += len;
- e->users++;
- list_add_tail(&e->list, &con->writequeue);
- spin_unlock(&con->writequeue_lock);
- goto got_one;
+void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
+{
+ struct connection *con;
+
+ if (len > DEFAULT_BUFFER_SIZE ||
+ len < sizeof(struct dlm_header)) {
+ BUILD_BUG_ON(PAGE_SIZE < DEFAULT_BUFFER_SIZE);
+ log_print("failed to allocate a buffer of size %d", len);
+ WARN_ON(1);
+ return NULL;
}
- return NULL;
+
+ con = nodeid2con(nodeid, allocation);
+ if (!con)
+ return NULL;
+
+ return new_wq_entry(con, len, allocation, ppc);
}
void dlm_lowcomms_commit_buffer(void *mh)
@@ -1406,7 +1430,8 @@ void dlm_lowcomms_commit_buffer(void *mh)
users = --e->users;
if (users)
goto out;
- e->len = e->end - e->offset;
+
+ e->len = DLM_WQ_LENGTH_BYTES(e);
spin_unlock(&con->writequeue_lock);
queue_work(send_workqueue, &con->swork);
@@ -1432,11 +1457,10 @@ static void send_to_sock(struct connection *con)
spin_lock(&con->writequeue_lock);
for (;;) {
- e = list_entry(con->writequeue.next, struct writequeue_entry,
- list);
- if ((struct list_head *) e == &con->writequeue)
+ if (list_empty(&con->writequeue))
break;
+ e = list_first_entry(&con->writequeue, struct writequeue_entry, list);
len = e->len;
offset = e->offset;
BUG_ON(len == 0 && e->users == 0);
@@ -1589,6 +1613,29 @@ static int work_start(void)
return 0;
}
+static void shutdown_conn(struct connection *con)
+{
+ if (con->shutdown_action)
+ con->shutdown_action(con);
+}
+
+void dlm_lowcomms_shutdown(void)
+{
+ /* Set all the flags to prevent any
+ * socket activity.
+ */
+ dlm_allow_conn = 0;
+
+ if (recv_workqueue)
+ flush_workqueue(recv_workqueue);
+ if (send_workqueue)
+ flush_workqueue(send_workqueue);
+
+ dlm_close_sock(&listen_con.sock);
+
+ foreach_conn(shutdown_conn);
+}
+
static void _stop_conn(struct connection *con, bool and_other)
{
mutex_lock(&con->sock_mutex);
@@ -1610,12 +1657,6 @@ static void stop_conn(struct connection *con)
_stop_conn(con, true);
}
-static void shutdown_conn(struct connection *con)
-{
- if (con->shutdown_action)
- con->shutdown_action(con);
-}
-
static void connection_release(struct rcu_head *rcu)
{
struct connection *con = container_of(rcu, struct connection, rcu);
@@ -1672,19 +1713,6 @@ static void work_flush(void)
void dlm_lowcomms_stop(void)
{
- /* Set all the flags to prevent any
- socket activity.
- */
- dlm_allow_conn = 0;
-
- if (recv_workqueue)
- flush_workqueue(recv_workqueue);
- if (send_workqueue)
- flush_workqueue(send_workqueue);
-
- dlm_close_sock(&listen_con.sock);
-
- foreach_conn(shutdown_conn);
work_flush();
foreach_conn(free_conn);
work_stop();
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 0918f9376489..48bbc4e18761 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -14,13 +14,18 @@
#define LOWCOMMS_MAX_TX_BUFFER_LEN 4096
+/* switch to check if dlm is running */
+extern int dlm_allow_conn;
+
int dlm_lowcomms_start(void);
+void dlm_lowcomms_shutdown(void);
void dlm_lowcomms_stop(void);
void dlm_lowcomms_exit(void);
int dlm_lowcomms_close(int nodeid);
void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
void dlm_lowcomms_commit_buffer(void *mh);
int dlm_lowcomms_connect_node(int nodeid);
+int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark);
int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
#endif /* __LOWCOMMS_DOT_H__ */
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index fde3a6afe4be..1c6654a21ec4 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -22,8 +22,6 @@
* into packets and sends them to the comms layer.
*/
-#include <asm/unaligned.h>
-
#include "dlm_internal.h"
#include "lowcomms.h"
#include "config.h"
@@ -45,13 +43,22 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
while (len >= sizeof(struct dlm_header)) {
hd = (struct dlm_header *)ptr;
- /* no message should be more than this otherwise we
- * cannot deliver this message to upper layers
+ /* no message should be more than DEFAULT_BUFFER_SIZE or
+ * less than dlm_header size.
+ *
+ * Some messages does not have a 8 byte length boundary yet
+ * which can occur in a unaligned memory access of some dlm
+ * messages. However this problem need to be fixed at the
+ * sending side, for now it seems nobody run into architecture
+ * related issues yet but it slows down some processing.
+ * Fixing this issue should be scheduled in future by doing
+ * the next major version bump.
*/
- msglen = get_unaligned_le16(&hd->h_length);
- if (msglen > DEFAULT_BUFFER_SIZE) {
- log_print("received invalid length header: %u, will abort message parsing",
- msglen);
+ msglen = le16_to_cpu(hd->h_length);
+ if (msglen > DEFAULT_BUFFER_SIZE ||
+ msglen < sizeof(struct dlm_header)) {
+ log_print("received invalid length header: %u from node %d, will abort message parsing",
+ msglen, nodeid);
return -EBADMSG;
}
@@ -84,15 +91,7 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
goto skip;
}
- /* for aligned memory access, we just copy current message
- * to begin of the buffer which contains already parsed buffer
- * data and should provide align access for upper layers
- * because the start address of the buffer has a aligned
- * address. This memmove can be removed when the upperlayer
- * is capable of unaligned memory access.
- */
- memmove(buf, ptr, msglen);
- dlm_receive_buffer((union dlm_packet *)buf, nodeid);
+ dlm_receive_buffer((union dlm_packet *)ptr, nodeid);
skip:
ret += msglen;
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 73ddee5159d7..f5b1bd65728d 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -41,7 +41,6 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
to_nodeid, type, len);
return -ENOBUFS;
}
- memset(mb, 0, mb_len);
rc = (struct dlm_rcom *) mb;
@@ -462,7 +461,6 @@ int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_NOFS, &mb);
if (!mh)
return -ENOBUFS;
- memset(mb, 0, mb_len);
rc = (struct dlm_rcom *) mb;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 943e523f4c9d..e3f5d7f3c8a0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 1997-2004 Erez Zadok
@@ -296,10 +296,6 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
struct extent_crypt_result ecr;
int rc = 0;
- if (!crypt_stat || !crypt_stat->tfm
- || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
- return -EINVAL;
-
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
crypt_stat->key_size);
@@ -350,7 +346,7 @@ out:
return rc;
}
-/**
+/*
* lower_offset_for_page
*
* Convert an eCryptfs page index into a lower byte offset
@@ -535,7 +531,7 @@ int ecryptfs_decrypt_page(struct page *page)
rc = crypt_extent(crypt_stat, page, page,
extent_offset, DECRYPT);
if (rc) {
- printk(KERN_ERR "%s: Error encrypting extent; "
+ printk(KERN_ERR "%s: Error decrypting extent; "
"rc = [%d]\n", __func__, rc);
goto out;
}
@@ -627,9 +623,8 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
}
}
-/**
+/*
* ecryptfs_compute_root_iv
- * @crypt_stats
*
* On error, sets the root IV to all 0's.
*/
@@ -1370,7 +1365,7 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry,
return rc;
}
-/**
+/*
* ecryptfs_read_metadata
*
* Common entry point for reading file metadata. From here, we could
@@ -1448,7 +1443,7 @@ out:
return rc;
}
-/**
+/*
* ecryptfs_encrypt_filename - encrypt filename
*
* CBC-encrypts the filename. We do not want to encrypt the same
@@ -1590,11 +1585,10 @@ out:
struct kmem_cache *ecryptfs_key_tfm_cache;
static struct list_head key_tfm_list;
-struct mutex key_tfm_list_mutex;
+DEFINE_MUTEX(key_tfm_list_mutex);
int __init ecryptfs_init_crypto(void)
{
- mutex_init(&key_tfm_list_mutex);
INIT_LIST_HEAD(&key_tfm_list);
return 0;
}
@@ -1877,10 +1871,11 @@ out:
/**
* ecryptfs_encrypt_and_encode_filename - converts a plaintext file name to cipher text
- * @crypt_stat: The crypt_stat struct associated with the file anem to encode
+ * @encoded_name: The encrypted name
+ * @encoded_name_size: Length of the encrypted name
+ * @mount_crypt_stat: The crypt_stat struct associated with the file name to encode
* @name: The plaintext name
- * @length: The length of the plaintext
- * @encoded_name: The encypted name
+ * @name_size: The length of the plaintext name
*
* Encrypts and encodes a filename into something that constitutes a
* valid filename for a filesystem, with printable characters.
@@ -1992,7 +1987,7 @@ static bool is_dot_dotdot(const char *name, size_t name_size)
* ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext
* @plaintext_name: The plaintext name
* @plaintext_name_size: The plaintext name size
- * @ecryptfs_dir_dentry: eCryptfs directory dentry
+ * @sb: Ecryptfs's super_block
* @name: The filename in cipher text
* @name_size: The cipher text name size
*
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
index 1f65e99f9a41..cf6d0e8e25a1 100644
--- a/fs/ecryptfs/debug.c
+++ b/fs/ecryptfs/debug.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
* Functions only useful for debugging.
*
@@ -9,7 +9,7 @@
#include "ecryptfs_kernel.h"
-/**
+/*
* ecryptfs_dump_auth_tok - debug function to print auth toks
*
* This function will print the contents of an ecryptfs authentication
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 44606f079efb..acaa0825e9bb 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 1997-2003 Erez Zadok
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index e6ac78c62ca4..5f2b49e13731 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -262,10 +262,7 @@ struct ecryptfs_inode_info {
* vfsmount too. */
struct ecryptfs_dentry_info {
struct path lower_path;
- union {
- struct ecryptfs_crypt_stat *crypt_stat;
- struct rcu_head rcu;
- };