diff --git a/direct-io-move-aio_complete-into-end_io.patch b/direct-io-move-aio_complete-into-end_io.patch new file mode 100644 index 000000000..b40487155 --- /dev/null +++ b/direct-io-move-aio_complete-into-end_io.patch @@ -0,0 +1,204 @@ +From: Christoph Hellwig +Date: Sun, 18 Jul 2010 21:17:09 +0000 (+0000) +Subject: direct-io: move aio_complete into ->end_io +X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=40e2e97316af6e62affab7a392e792494b8d9dde + +direct-io: move aio_complete into ->end_io + +Filesystems with unwritten extent support must not complete an AIO request +until the transaction to convert the extent has been commited. That means +the aio_complete calls needs to be moved into the ->end_io callback so +that the filesystem can control when to call it exactly. + +This makes a bit of a mess out of dio_complete and the ->end_io callback +prototype even more complicated. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Jan Kara +Signed-off-by: Alex Elder +--- + +diff --git a/fs/direct-io.c b/fs/direct-io.c +index 7600aac..a10cb91 100644 +--- a/fs/direct-io.c ++++ b/fs/direct-io.c +@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio) + * filesystems can use it to hold additional state between get_block calls and + * dio_complete. + */ +-static int dio_complete(struct dio *dio, loff_t offset, int ret) ++static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async) + { + ssize_t transferred = 0; + +@@ -239,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) + transferred = dio->i_size - offset; + } + +- if (dio->end_io && dio->result) +- dio->end_io(dio->iocb, offset, transferred, +- dio->map_bh.b_private); +- +- if (dio->flags & DIO_LOCKING) +- /* lockdep: non-owner release */ +- up_read_non_owner(&dio->inode->i_alloc_sem); +- + if (ret == 0) + ret = dio->page_errors; + if (ret == 0) +@@ -254,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) + if (ret == 0) + ret = transferred; + ++ if (dio->end_io && dio->result) { ++ dio->end_io(dio->iocb, offset, transferred, ++ dio->map_bh.b_private, ret, is_async); ++ } else if (is_async) { ++ aio_complete(dio->iocb, ret, 0); ++ } ++ ++ if (dio->flags & DIO_LOCKING) ++ /* lockdep: non-owner release */ ++ up_read_non_owner(&dio->inode->i_alloc_sem); ++ + return ret; + } + +@@ -277,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error) + spin_unlock_irqrestore(&dio->bio_lock, flags); + + if (remaining == 0) { +- int ret = dio_complete(dio, dio->iocb->ki_pos, 0); +- aio_complete(dio->iocb, ret, 0); ++ dio_complete(dio, dio->iocb->ki_pos, 0, true); + kfree(dio); + } + } +@@ -1126,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, + spin_unlock_irqrestore(&dio->bio_lock, flags); + + if (ret2 == 0) { +- ret = dio_complete(dio, offset, ret); ++ ret = dio_complete(dio, offset, ret, false); + kfree(dio); + } else + BUG_ON(ret != -EIOCBQUEUED); +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 42272d6..0afc8c1 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3775,7 +3775,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) + } + + static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, +- ssize_t size, void *private) ++ ssize_t size, void *private, int ret, ++ bool is_async) + { + ext4_io_end_t *io_end = iocb->private; + struct workqueue_struct *wq; +@@ -3784,7 +3785,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, + + /* if not async direct IO or dio with 0 bytes write, just return */ + if (!io_end || !size) +- return; ++ goto out; + + ext_debug("ext4_end_io_dio(): io_end 0x%p" + "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", +@@ -3795,7 +3796,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, + if (io_end->flag != EXT4_IO_UNWRITTEN){ + ext4_free_io_end(io_end); + iocb->private = NULL; +- return; ++ goto out; + } + + io_end->offset = offset; +@@ -3812,6 +3813,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, + list_add_tail(&io_end->list, &ei->i_completed_io_list); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); + iocb->private = NULL; ++out: ++ if (is_async) ++ aio_complete(iocb, ret, 0); + } + + static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) +diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c +index 356e976..96337a4 100644 +--- a/fs/ocfs2/aops.c ++++ b/fs/ocfs2/aops.c +@@ -578,7 +578,9 @@ bail: + static void ocfs2_dio_end_io(struct kiocb *iocb, + loff_t offset, + ssize_t bytes, +- void *private) ++ void *private, ++ int ret, ++ bool is_async) + { + struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; + int level; +@@ -592,6 +594,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, + if (!level) + up_read(&inode->i_alloc_sem); + ocfs2_rw_unlock(inode, level); ++ ++ if (is_async) ++ aio_complete(iocb, ret, 0); + } + + /* +diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c +index 8abbf05..95d1e26 100644 +--- a/fs/xfs/linux-2.6/xfs_aops.c ++++ b/fs/xfs/linux-2.6/xfs_aops.c +@@ -1406,7 +1406,9 @@ xfs_end_io_direct( + struct kiocb *iocb, + loff_t offset, + ssize_t size, +- void *private) ++ void *private, ++ int ret, ++ bool is_async) + { + xfs_ioend_t *ioend = iocb->private; + +@@ -1452,6 +1454,9 @@ xfs_end_io_direct( + * against double-freeing. + */ + iocb->private = NULL; ++ ++ if (is_async) ++ aio_complete(iocb, ret, 0); + } + + STATIC ssize_t +diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h +index 319da17..c5057fb 100644 +--- a/fs/xfs/linux-2.6/xfs_aops.h ++++ b/fs/xfs/linux-2.6/xfs_aops.h +@@ -37,6 +37,8 @@ typedef struct xfs_ioend { + size_t io_size; /* size of the extent */ + xfs_off_t io_offset; /* offset in the file */ + struct work_struct io_work; /* xfsdatad work queue */ ++ struct kiocb *io_iocb; ++ int io_result; + } xfs_ioend_t; + + extern const struct address_space_operations xfs_address_space_operations; +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 68ca1b0..f91affb 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -415,7 +415,8 @@ struct buffer_head; + typedef int (get_block_t)(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); + typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, +- ssize_t bytes, void *private); ++ ssize_t bytes, void *private, int ret, ++ bool is_async); + + /* + * Attribute flags. These should be or-ed together to figure out what diff --git a/ext4-move-aio-completion-after-unwritten-extent-conversion.patch b/ext4-move-aio-completion-after-unwritten-extent-conversion.patch new file mode 100644 index 000000000..7e2cb1702 --- /dev/null +++ b/ext4-move-aio-completion-after-unwritten-extent-conversion.patch @@ -0,0 +1,95 @@ +From: jiayingz@google.com (Jiaying Zhang) <> +Date: Tue, 27 Jul 2010 15:56:06 +0000 (-0400) +Subject: ext4: move aio completion after unwritten extent conversion +X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftytso%2Fext4.git;a=commitdiff_plain;h=5b3ff237bef43b9e7fb7d1eb858e29b73fd664f9 + +ext4: move aio completion after unwritten extent conversion + +This patch is to be applied upon Christoph's "direct-io: move aio_complete +into ->end_io" patch. It adds iocb and result fields to struct ext4_io_end_t, +so that we can call aio_complete from ext4_end_io_nolock() after the extent +conversion has finished. + +I have verified with Christoph's aio-dio test that used to fail after a few +runs on an original kernel but now succeeds on the patched kernel. + +See http://thread.gmane.org/gmane.comp.file-systems.ext4/19659 for details. + +Signed-off-by: Jiaying Zhang +Signed-off-by: "Theodore Ts'o" +--- + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 4c7d472..fbb3947 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -170,13 +170,15 @@ struct mpage_da_data { + }; + #define EXT4_IO_UNWRITTEN 0x1 + typedef struct ext4_io_end { +- struct list_head list; /* per-file finished AIO list */ ++ struct list_head list; /* per-file finished IO list */ + struct inode *inode; /* file being written to */ + unsigned int flag; /* unwritten or not */ + struct page *page; /* page struct for buffer write */ + loff_t offset; /* offset in the file */ + ssize_t size; /* size of the extent */ + struct work_struct work; /* data work queue */ ++ struct kiocb *iocb; /* iocb struct for AIO */ ++ int result; /* error value for AIO */ + } ext4_io_end_t; + + /* +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 609159e..46d2079 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3668,6 +3668,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io) + return ret; + } + ++ if (io->iocb) ++ aio_complete(io->iocb, io->result, 0); + /* clear the DIO AIO unwritten flag */ + io->flag = 0; + return ret; +@@ -3767,6 +3769,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) + io->offset = 0; + io->size = 0; + io->page = NULL; ++ io->iocb = NULL; ++ io->result = 0; + INIT_WORK(&io->work, ext4_end_io_work); + INIT_LIST_HEAD(&io->list); + } +@@ -3796,12 +3800,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, + if (io_end->flag != EXT4_IO_UNWRITTEN){ + ext4_free_io_end(io_end); + iocb->private = NULL; +- goto out; ++out: ++ if (is_async) ++ aio_complete(iocb, ret, 0); ++ return; + } + + io_end->offset = offset; + io_end->size = size; +- io_end->flag = EXT4_IO_UNWRITTEN; ++ if (is_async) { ++ io_end->iocb = iocb; ++ io_end->result = ret; ++ } + wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; + + /* queue the work to convert unwritten extents to written */ +@@ -3813,9 +3823,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, + list_add_tail(&io_end->list, &ei->i_completed_io_list); + spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); + iocb->private = NULL; +-out: +- if (is_async) +- aio_complete(iocb, ret, 0); + } + + static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) diff --git a/kernel.spec b/kernel.spec index 3c17c3d8c..1bcae334f 100644 --- a/kernel.spec +++ b/kernel.spec @@ -48,7 +48,7 @@ Summary: The Linux kernel # reset this by hand to 1 (or to 0 and then use rpmdev-bumpspec). # scripts/rebase.sh should be made to do that for you, actually. # -%global baserelease 2 +%global baserelease 3 %global fedora_build %{baserelease} # base_sublevel is the kernel version we're starting with and patching @@ -700,6 +700,11 @@ Patch12030: ssb_check_for_sprom.patch Patch12040: only-use-alpha2-regulatory-information-from-country-IE.patch +# rhbz #617699 +Patch12050: direct-io-move-aio_complete-into-end_io.patch +Patch12060: ext4-move-aio-completion-after-unwritten-extent-conversion.patch +Patch12070: xfs-move-aio-completion-after-unwritten-extent-conversion.patch + %endif BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root @@ -1287,6 +1292,11 @@ ApplyPatch neuter_intel_microcode_load.patch ApplyPatch only-use-alpha2-regulatory-information-from-country-IE.patch +# rhbz #617699 +ApplyPatch direct-io-move-aio_complete-into-end_io.patch +ApplyPatch ext4-move-aio-completion-after-unwritten-extent-conversion.patch +ApplyPatch xfs-move-aio-completion-after-unwritten-extent-conversion.patch + # END OF PATCH APPLICATIONS %endif @@ -1870,17 +1880,12 @@ fi # plz don't put in a version string unless you're going to tag # and build. -# ___________________________________________________________ -# / This branch is for Fedora 14. You probably want to commit \ -# \ to the F-13 branch instead, or in addition to this one. / -# ----------------------------------------------------------- -# \ ^__^ -# \ (@@)\_______ -# (__)\ )\/\ -# ||----w | -# || || - %changelog +* Fri Aug 06 2010 Chuck Ebbert 2.6.35-3 +- Copy fix for bug #617699 ("ext4 and xfs wrong data returned on read + after write if file size was changed with ftruncate") from F-13 +- Disable CONFIG_MULTICORE_RAID456 + * Wed Aug 04 2010 Dave Jones - sched: Revert nohz_ratelimit() which causes a lot of extra wakeups burning CPU, and my legs. diff --git a/xfs-move-aio-completion-after-unwritten-extent-conversion.patch b/xfs-move-aio-completion-after-unwritten-extent-conversion.patch new file mode 100644 index 000000000..290be17fd --- /dev/null +++ b/xfs-move-aio-completion-after-unwritten-extent-conversion.patch @@ -0,0 +1,83 @@ +From: Christoph Hellwig +Date: Sun, 18 Jul 2010 21:17:10 +0000 (+0000) +Subject: xfs: move aio completion after unwritten extent conversion +X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=fb511f2150174b18b28ad54708c1adda0df39b17 + +xfs: move aio completion after unwritten extent conversion + +If we write into an unwritten extent using AIO we need to complete the AIO +request after the extent conversion has finished. Without that a read could +race to see see the extent still unwritten and return zeros. For synchronous +I/O we already take care of that by flushing the xfsconvertd workqueue (which +might be a bit of overkill). + +To do that add iocb and result fields to struct xfs_ioend, so that we can +call aio_complete from xfs_end_io after the extent conversion has happened. +Note that we need a new result field as io_error is used for positive errno +values, while the AIO code can return negative error values and positive +transfer sizes. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Dave Chinner +Signed-off-by: Alex Elder +--- + +diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c +index 95d1e26..13622d5 100644 +--- a/fs/xfs/linux-2.6/xfs_aops.c ++++ b/fs/xfs/linux-2.6/xfs_aops.c +@@ -265,8 +265,11 @@ xfs_end_io( + xfs_finish_ioend(ioend, 0); + /* ensure we don't spin on blocked ioends */ + delay(1); +- } else ++ } else { ++ if (ioend->io_iocb) ++ aio_complete(ioend->io_iocb, ioend->io_result, 0); + xfs_destroy_ioend(ioend); ++ } + } + + /* +@@ -299,6 +302,8 @@ xfs_alloc_ioend( + atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); + ioend->io_offset = 0; + ioend->io_size = 0; ++ ioend->io_iocb = NULL; ++ ioend->io_result = 0; + + INIT_WORK(&ioend->io_work, xfs_end_io); + return ioend; +@@ -1411,6 +1416,7 @@ xfs_end_io_direct( + bool is_async) + { + xfs_ioend_t *ioend = iocb->private; ++ bool complete_aio = is_async; + + /* + * Non-NULL private data means we need to issue a transaction to +@@ -1436,7 +1442,14 @@ xfs_end_io_direct( + if (ioend->io_type == IO_READ) { + xfs_finish_ioend(ioend, 0); + } else if (private && size > 0) { +- xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); ++ if (is_async) { ++ ioend->io_iocb = iocb; ++ ioend->io_result = ret; ++ complete_aio = false; ++ xfs_finish_ioend(ioend, 0); ++ } else { ++ xfs_finish_ioend(ioend, 1); ++ } + } else { + /* + * A direct I/O write ioend starts it's life in unwritten +@@ -1455,7 +1468,7 @@ xfs_end_io_direct( + */ + iocb->private = NULL; + +- if (is_async) ++ if (complete_aio) + aio_complete(iocb, ret, 0); + } +