From 93862d5e1ab875664c6cc95254fc365028a48bb1 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 25 Jul 2011 14:58:15 -0700 Subject: ocfs2: Implement llseek() ocfs2 implements its own llseek() to provide the SEEK_HOLE/SEEK_DATA functionality. SEEK_HOLE sets the file pointer to the start of either a hole or an unwritten (preallocated) extent, that is greater than or equal to the supplied offset. SEEK_DATA sets the file pointer to the start of an allocated extent (not unwritten) that is greater than or equal to the supplied offset. If the supplied offset is on a desired region, then the file pointer is set to it. Offsets greater than or equal to the file size return -ENXIO. Unwritten (preallocated) extents are considered holes because the file system treats reads to such regions in the same way as it does to holes. Signed-off-by: Sunil Mushran --- fs/ocfs2/file.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0fc2bd34039d..c0f015e11c28 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2591,6 +2591,57 @@ bail: return ret; } +/* Refer generic_file_llseek_unlocked() */ +static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int origin) +{ + struct inode *inode = file->f_mapping->host; + int ret = 0; + + mutex_lock(&inode->i_mutex); + + switch (origin) { + case SEEK_SET: + break; + case SEEK_END: + offset += inode->i_size; + break; + case SEEK_CUR: + if (offset == 0) { + offset = file->f_pos; + goto out; + } + offset += file->f_pos; + break; + case SEEK_DATA: + case SEEK_HOLE: + ret = ocfs2_seek_data_hole_offset(file, &offset, origin); + if (ret) + goto out; + break; + default: + ret = -EINVAL; + goto out; + } + + if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) + ret = -EINVAL; + if (!ret && offset > inode->i_sb->s_maxbytes) + ret = -EINVAL; + if (ret) + goto out; + + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; + } + +out: + mutex_unlock(&inode->i_mutex); + if (ret) + return ret; + return offset; +} + const struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, @@ -2615,7 +2666,7 @@ const struct inode_operations ocfs2_special_file_iops = { * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! */ const struct file_operations ocfs2_fops = { - .llseek = generic_file_llseek, + .llseek = ocfs2_file_llseek, .read = do_sync_read, .write = do_sync_write, .mmap = ocfs2_mmap, @@ -2663,7 +2714,7 @@ const struct file_operations ocfs2_dops = { * the cluster. */ const struct file_operations ocfs2_fops_no_plocks = { - .llseek = generic_file_llseek, + .llseek = ocfs2_file_llseek, .read = do_sync_read, .write = do_sync_write, .mmap = ocfs2_mmap, -- cgit From a11f7e63c59810a81494d4c4b028af707d4c7ca4 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 22 Jun 2011 14:23:38 -0700 Subject: ocfs2: serialize unaligned aio Fix a corruption that can happen when we have (two or more) outstanding aio's to an overlapping unaligned region. Ext4 (e9e3bcecf44c04b9e6b505fd8e2eb9cea58fb94d) and xfs recently had to fix similar issues. In our case what happens is that we can have an outstanding aio on a region and if a write comes in with some bytes overlapping the original aio we may decide to read that region into a page before continuing (typically because of buffered-io fallback). Since we have no ordering guarantees with the aio, we can read stale or bad data into the page and then write it back out. If the i/o is page and block aligned, then we avoid this issue as there won't be any need to read data from disk. I took the same approach as Eric in the ext4 patch and introduced some serialization of unaligned async direct i/o. I don't expect this to have an effect on the most common cases of AIO. Unaligned aio will be slower though, but that's far more acceptable than data corruption. Signed-off-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/ocfs2/file.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index b1e35a392ca5..145f4533a936 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2038,6 +2038,23 @@ out: return ret; } +static void ocfs2_aiodio_wait(struct inode *inode) +{ + wait_queue_head_t *wq = ocfs2_ioend_wq(inode); + + wait_event(*wq, (atomic_read(&OCFS2_I(inode)->ip_unaligned_aio) == 0)); +} + +static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) +{ + int blockmask = inode->i_sb->s_blocksize - 1; + loff_t final_size = pos + count; + + if ((pos & blockmask) || (final_size & blockmask)) + return 1; + return 0; +} + static int ocfs2_prepare_inode_for_refcount(struct inode *inode, struct file *file, loff_t pos, size_t count, @@ -2216,6 +2233,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); + int unaligned_dio = 0; trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -2284,6 +2302,10 @@ relock: goto out; } + if (direct_io && !is_sync_kiocb(iocb)) + unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_left, + *ppos); + /* * We can't complete the direct I/O as requested, fall back to * buffered I/O. @@ -2299,6 +2321,18 @@ relock: goto relock; } + if (unaligned_dio) { + /* + * Wait on previous unaligned aio to complete before + * proceeding. + */ + ocfs2_aiodio_wait(inode); + + /* Mark the iocb as needing a decrement in ocfs2_dio_end_io */ + atomic_inc(&OCFS2_I(inode)->ip_unaligned_aio); + ocfs2_iocb_set_unaligned_aio(iocb); + } + /* * To later detect whether a journal commit for sync writes is * necessary, we sample i_size, and cluster count here. @@ -2371,8 +2405,12 @@ out_dio: if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { rw_level = -1; have_alloc_sem = 0; + unaligned_dio = 0; } + if (unaligned_dio) + atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio); + out: if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); -- cgit From df295d4a4b3c98af1a2445a82aef169e7e5d96b8 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 16 Nov 2011 12:03:10 -0800 Subject: ocfs2: honor O_(D)SYNC flag in fallocate We need to sync the transaction which updates i_size if the file is marked as needing sync semantics. Signed-off-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/ocfs2/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 5c4a74e04ab4..09e3de57cdee 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1950,6 +1950,9 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, if (ret < 0) mlog_errno(ret); + if (file->f_flags & O_SYNC) + handle->h_sync = 1; + ocfs2_commit_trans(osb, handle); out_inode_unlock: -- cgit From 42b2aa86c6670347a2a07e6d7af0e0ecc8fdbff9 Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Mon, 28 Nov 2011 20:31:00 -0800 Subject: treewide: Fix typos in various parts of the kernel, and fix some comments. The below patch fixes some typos in various parts of the kernel, as well as fixes some comments. Please let me know if I missed anything, and I will try to get it changed and resent. Signed-off-by: Justin P. Mattock Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- fs/ocfs2/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index de4ea1af041b..199c606c56a5 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2108,7 +2108,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file, * remove_suid() calls ->setattr without any hint that * we may have already done our cluster locking. Since * ocfs2_setattr() *must* take cluster locks to - * proceeed, this will lead us to recursively lock the + * proceed, this will lead us to recursively lock the * inode. There's also the dinode i_size state which * can be lost via setattr during extending writes (we * set inode->i_size at the end of a write. */ -- cgit