Fix "ext4 and xfs wrong data returned on read after write if

file size was changed with ftruncate" (#617699)
This commit is contained in:
Chuck Ebbert 2010-08-04 10:52:15 -04:00
parent 4e7843a1f2
commit bb314e4958
4 changed files with 394 additions and 0 deletions

View File

@ -0,0 +1,204 @@
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 18 Jul 2010 21:17:09 +0000 (+0000)
Subject: direct-io: move aio_complete into ->end_io
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=40e2e97316af6e62affab7a392e792494b8d9dde
direct-io: move aio_complete into ->end_io
Filesystems with unwritten extent support must not complete an AIO request
until the transaction to convert the extent has been commited. That means
the aio_complete calls needs to be moved into the ->end_io callback so
that the filesystem can control when to call it exactly.
This makes a bit of a mess out of dio_complete and the ->end_io callback
prototype even more complicated.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 7600aac..a10cb91 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
* filesystems can use it to hold additional state between get_block calls and
* dio_complete.
*/
-static int dio_complete(struct dio *dio, loff_t offset, int ret)
+static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
{
ssize_t transferred = 0;
@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
transferred = dio->i_size - offset;
}
- if (dio->end_io && dio->result)
- dio->end_io(dio->iocb, offset, transferred,
- dio->map_bh.b_private);
-
- if (dio->flags & DIO_LOCKING)
- /* lockdep: non-owner release */
- up_read_non_owner(&dio->inode->i_alloc_sem);
-
if (ret == 0)
ret = dio->page_errors;
if (ret == 0)
@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
if (ret == 0)
ret = transferred;
+ if (dio->end_io && dio->result) {
+ dio->end_io(dio->iocb, offset, transferred,
+ dio->map_bh.b_private, ret, is_async);
+ } else if (is_async) {
+ aio_complete(dio->iocb, ret, 0);
+ }
+
+ if (dio->flags & DIO_LOCKING)
+ /* lockdep: non-owner release */
+ up_read_non_owner(&dio->inode->i_alloc_sem);
+
return ret;
}
@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (remaining == 0) {
- int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
- aio_complete(dio->iocb, ret, 0);
+ dio_complete(dio, dio->iocb->ki_pos, 0, true);
kfree(dio);
}
}
@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (ret2 == 0) {
- ret = dio_complete(dio, offset, ret);
+ ret = dio_complete(dio, offset, ret, false);
kfree(dio);
} else
BUG_ON(ret != -EIOCBQUEUED);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 42272d6..0afc8c1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3775,7 +3775,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
}
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
- ssize_t size, void *private)
+ ssize_t size, void *private, int ret,
+ bool is_async)
{
ext4_io_end_t *io_end = iocb->private;
struct workqueue_struct *wq;
@@ -3784,7 +3785,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
/* if not async direct IO or dio with 0 bytes write, just return */
if (!io_end || !size)
- return;
+ goto out;
ext_debug("ext4_end_io_dio(): io_end 0x%p"
"for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3795,7 +3796,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if (io_end->flag != EXT4_IO_UNWRITTEN){
ext4_free_io_end(io_end);
iocb->private = NULL;
- return;
+ goto out;
}
io_end->offset = offset;
@@ -3812,6 +3813,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
iocb->private = NULL;
+out:
+ if (is_async)
+ aio_complete(iocb, ret, 0);
}
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 356e976..96337a4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -578,7 +578,9 @@ bail:
static void ocfs2_dio_end_io(struct kiocb *iocb,
loff_t offset,
ssize_t bytes,
- void *private)
+ void *private,
+ int ret,
+ bool is_async)
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
int level;
@@ -592,6 +594,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
if (!level)
up_read(&inode->i_alloc_sem);
ocfs2_rw_unlock(inode, level);
+
+ if (is_async)
+ aio_complete(iocb, ret, 0);
}
/*
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 8abbf05..95d1e26 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1406,7 +1406,9 @@ xfs_end_io_direct(
struct kiocb *iocb,
loff_t offset,
ssize_t size,
- void *private)
+ void *private,
+ int ret,
+ bool is_async)
{
xfs_ioend_t *ioend = iocb->private;
@@ -1452,6 +1454,9 @@ xfs_end_io_direct(
* against double-freeing.
*/
iocb->private = NULL;
+
+ if (is_async)
+ aio_complete(iocb, ret, 0);
}
STATIC ssize_t
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 319da17..c5057fb 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */
struct work_struct io_work; /* xfsdatad work queue */
+ struct kiocb *io_iocb;
+ int io_result;
} xfs_ioend_t;
extern const struct address_space_operations xfs_address_space_operations;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 68ca1b0..f91affb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -415,7 +415,8 @@ struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
- ssize_t bytes, void *private);
+ ssize_t bytes, void *private, int ret,
+ bool is_async);
/*
* Attribute flags. These should be or-ed together to figure out what

View File

@ -0,0 +1,95 @@
From: jiayingz@google.com (Jiaying Zhang) <>
Date: Tue, 27 Jul 2010 15:56:06 +0000 (-0400)
Subject: ext4: move aio completion after unwritten extent conversion
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftytso%2Fext4.git;a=commitdiff_plain;h=5b3ff237bef43b9e7fb7d1eb858e29b73fd664f9
ext4: move aio completion after unwritten extent conversion
This patch is to be applied upon Christoph's "direct-io: move aio_complete
into ->end_io" patch. It adds iocb and result fields to struct ext4_io_end_t,
so that we can call aio_complete from ext4_end_io_nolock() after the extent
conversion has finished.
I have verified with Christoph's aio-dio test that used to fail after a few
runs on an original kernel but now succeeds on the patched kernel.
See http://thread.gmane.org/gmane.comp.file-systems.ext4/19659 for details.
Signed-off-by: Jiaying Zhang <jiayingz@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4c7d472..fbb3947 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -170,13 +170,15 @@ struct mpage_da_data {
};
#define EXT4_IO_UNWRITTEN 0x1
typedef struct ext4_io_end {
- struct list_head list; /* per-file finished AIO list */
+ struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */
struct page *page; /* page struct for buffer write */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
struct work_struct work; /* data work queue */
+ struct kiocb *iocb; /* iocb struct for AIO */
+ int result; /* error value for AIO */
} ext4_io_end_t;
/*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 609159e..46d2079 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3668,6 +3668,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io)
return ret;
}
+ if (io->iocb)
+ aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */
io->flag = 0;
return ret;
@@ -3767,6 +3769,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
io->offset = 0;
io->size = 0;
io->page = NULL;
+ io->iocb = NULL;
+ io->result = 0;
INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list);
}
@@ -3796,12 +3800,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if (io_end->flag != EXT4_IO_UNWRITTEN){
ext4_free_io_end(io_end);
iocb->private = NULL;
- goto out;
+out:
+ if (is_async)
+ aio_complete(iocb, ret, 0);
+ return;
}
io_end->offset = offset;
io_end->size = size;
- io_end->flag = EXT4_IO_UNWRITTEN;
+ if (is_async) {
+ io_end->iocb = iocb;
+ io_end->result = ret;
+ }
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */
@@ -3813,9 +3823,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
iocb->private = NULL;
-out:
- if (is_async)
- aio_complete(iocb, ret, 0);
}
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)

View File

@ -758,6 +758,10 @@ Patch12410: cifs-fix-dns-resolver.patch
Patch12420: matroxfb-fix-font-corruption.patch
Patch12430: cred-dont-resurrect-dead-credentials.patch
Patch12440: direct-io-move-aio_complete-into-end_io.patch
Patch12450: ext4-move-aio-completion-after-unwritten-extent-conversion.patch
Patch12460: xfs-move-aio-completion-after-unwritten-extent-conversion.patch
%endif
BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root
@ -1406,6 +1410,12 @@ ApplyPatch matroxfb-fix-font-corruption.patch
# RHBZ #591015
ApplyPatch cred-dont-resurrect-dead-credentials.patch
# RHBZ #617699
ApplyPatch direct-io-move-aio_complete-into-end_io.patch
ApplyPatch ext4-move-aio-completion-after-unwritten-extent-conversion.patch
ApplyPatch xfs-move-aio-completion-after-unwritten-extent-conversion.patch
# END OF PATCH APPLICATIONS
%endif
@ -2031,6 +2041,8 @@ fi
- Drop ancient linux-2.6-mac80211-age-scan-results-on-resume.patch
- Fix matroxfb font corruption (#617687)
- Don't resurrect dead task credentials (#591015)
- Fix "ext4 and xfs wrong data returned on read after write if
file size was changed with ftruncate" (#617699)
* Sun Aug 01 2010 Chuck Ebbert <cebbert@redhat.com> 2.6.34.2-32.rc1
- Linux 2.6.34.2-rc1

View File

@ -0,0 +1,83 @@
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 18 Jul 2010 21:17:10 +0000 (+0000)
Subject: xfs: move aio completion after unwritten extent conversion
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=fb511f2150174b18b28ad54708c1adda0df39b17
xfs: move aio completion after unwritten extent conversion
If we write into an unwritten extent using AIO we need to complete the AIO
request after the extent conversion has finished. Without that a read could
race to see see the extent still unwritten and return zeros. For synchronous
I/O we already take care of that by flushing the xfsconvertd workqueue (which
might be a bit of overkill).
To do that add iocb and result fields to struct xfs_ioend, so that we can
call aio_complete from xfs_end_io after the extent conversion has happened.
Note that we need a new result field as io_error is used for positive errno
values, while the AIO code can return negative error values and positive
transfer sizes.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Alex Elder <aelder@sgi.com>
---
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 95d1e26..13622d5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -265,8 +265,11 @@ xfs_end_io(
xfs_finish_ioend(ioend, 0);
/* ensure we don't spin on blocked ioends */
delay(1);
- } else
+ } else {
+ if (ioend->io_iocb)
+ aio_complete(ioend->io_iocb, ioend->io_result, 0);
xfs_destroy_ioend(ioend);
+ }
}
/*
@@ -299,6 +302,8 @@ xfs_alloc_ioend(
atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
ioend->io_offset = 0;
ioend->io_size = 0;
+ ioend->io_iocb = NULL;
+ ioend->io_result = 0;
INIT_WORK(&ioend->io_work, xfs_end_io);
return ioend;
@@ -1411,6 +1416,7 @@ xfs_end_io_direct(
bool is_async)
{
xfs_ioend_t *ioend = iocb->private;
+ bool complete_aio = is_async;
/*
* Non-NULL private data means we need to issue a transaction to
@@ -1436,7 +1442,14 @@ xfs_end_io_direct(
if (ioend->io_type == IO_READ) {
xfs_finish_ioend(ioend, 0);
} else if (private && size > 0) {
- xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
+ if (is_async) {
+ ioend->io_iocb = iocb;
+ ioend->io_result = ret;
+ complete_aio = false;
+ xfs_finish_ioend(ioend, 0);
+ } else {
+ xfs_finish_ioend(ioend, 1);
+ }
} else {
/*
* A direct I/O write ioend starts it's life in unwritten
@@ -1455,7 +1468,7 @@ xfs_end_io_direct(
*/
iocb->private = NULL;
- if (is_async)
+ if (complete_aio)
aio_complete(iocb, ret, 0);
}