180 lines
6.2 KiB
Diff
180 lines
6.2 KiB
Diff
|
From: Dmitry Monakhov <dmonakhov@xxxxxxxx>
|
||
|
Date: Thu, 9 Oct 2014 15:14:47 +0400
|
||
|
Subject: [PATCH] ext4: fix race between write and fcntl(F_SETFL)
|
||
|
|
||
|
O_DIRECT flags can be toggeled via fcntl(F_SETFL).
|
||
|
But this value checked twice inside ext4_file_write_iter() and __generic_file_write()
|
||
|
which result in BUG_ON (see typical stack trace below)
|
||
|
In order to fix this we have to use our own copy of __generic_file_write and
|
||
|
pass o_direct status explicitly.
|
||
|
|
||
|
TESTCASE: xfstest:generic/326 (http://patchwork.ozlabs.org/patch/397949/)
|
||
|
|
||
|
kernel BUG at fs/ext4/inode.c:2960!
|
||
|
invalid opcode: 0000 [#1] SMP
|
||
|
Modules linked in: brd iTCO_wdt lpc_ich mfd_core igb ptp dm_mirror dm_region_hash dm_log dm_mod
|
||
|
CPU: 6 PID: 5505 Comm: aio-dio-fcntl-r Not tainted 3.17.0-rc2-00176-gff5c017 #161
|
||
|
Hardware name: Intel Corporation W2600CR/W2600CR, BIOS SE5C600.86B.99.99.x028.061320111235 06/13/2011
|
||
|
task: ffff88080e95a7c0 ti: ffff88080f908000 task.ti: ffff88080f908000
|
||
|
RIP: 0010:[<ffffffff811fabf2>] [<ffffffff811fabf2>] ext4_direct_IO+0x162/0x3d0
|
||
|
RSP: 0018:ffff88080f90bb58 EFLAGS: 00010246
|
||
|
RAX: 0000000000000400 RBX: ffff88080fdb2a28 RCX: 00000000a802c818
|
||
|
RDX: 0000040000080000 RSI: ffff88080d8aeb80 RDI: 0000000000000001
|
||
|
RBP: ffff88080f90bbc8 R08: 0000000000000000 R09: 0000000000001581
|
||
|
R10: 0000000000000000 R11: 0000000000000000 R12: ffff88080d8aeb80
|
||
|
R13: ffff88080f90bbf8 R14: ffff88080fdb28c8 R15: ffff88080fdb2a28
|
||
|
FS: 00007f23b2055700(0000) GS:ffff880818400000(0000) knlGS:0000000000000000
|
||
|
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
|
||
|
CR2: 00007f23b2045000 CR3: 000000080cedf000 CR4: 00000000000407e0
|
||
|
Stack:
|
||
|
ffff88080f90bb98 0000000000000000 7ffffffffffffffe ffff88080fdb2c30
|
||
|
0000000000000200 0000000000000200 0000000000000001 0000000000000200
|
||
|
ffff88080f90bbc8 ffff88080fdb2c30 ffff88080f90be08 0000000000000200
|
||
|
Call Trace:
|
||
|
[<ffffffff8112ca9d>] generic_file_direct_write+0xed/0x180
|
||
|
[<ffffffff8112f2b2>] __generic_file_write_iter+0x222/0x370
|
||
|
[<ffffffff811f495b>] ext4_file_write_iter+0x34b/0x400
|
||
|
[<ffffffff811bd709>] ? aio_run_iocb+0x239/0x410
|
||
|
[<ffffffff811bd709>] ? aio_run_iocb+0x239/0x410
|
||
|
[<ffffffff810990e5>] ? local_clock+0x25/0x30
|
||
|
[<ffffffff810abd94>] ? __lock_acquire+0x274/0x700
|
||
|
[<ffffffff811f4610>] ? ext4_unwritten_wait+0xb0/0xb0
|
||
|
[<ffffffff811bd756>] aio_run_iocb+0x286/0x410
|
||
|
[<ffffffff810990e5>] ? local_clock+0x25/0x30
|
||
|
[<ffffffff810ac359>] ? lock_release_holdtime+0x29/0x190
|
||
|
[<ffffffff811bc05b>] ? lookup_ioctx+0x4b/0xf0
|
||
|
[<ffffffff811bde3b>] do_io_submit+0x55b/0x740
|
||
|
[<ffffffff811bdcaa>] ? do_io_submit+0x3ca/0x740
|
||
|
[<ffffffff811be030>] SyS_io_submit+0x10/0x20
|
||
|
[<ffffffff815ce192>] system_call_fastpath+0x16/0x1b
|
||
|
Code: 01 48 8b 80 f0 01 00 00 48 8b 18 49 8b 45 10 0f 85 f1 01 00 00 48 03 45 c8 48 3b 43 48 0f 8f e3 01 00 00 49 83 7c 24 18 00 75 04 <0f> 0b eb fe f0 ff 83 ec 01 00 00 49 8b 44 24 18 8b 00 85 c0 89
|
||
|
RIP [<ffffffff811fabf2>] ext4_direct_IO+0x162/0x3d0
|
||
|
RSP <ffff88080f90bb58>
|
||
|
|
||
|
Upstream-status: Submitted but likely not accepted
|
||
|
Bugzilla: 1152608
|
||
|
|
||
|
Reported-by: Sasha Levin <sasha.levin@xxxxxxxxxx>
|
||
|
Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx>
|
||
|
---
|
||
|
fs/ext4/file.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
|
||
|
1 file changed, 95 insertions(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
|
||
|
index aca7b24a4432..8477eb259809 100644
|
||
|
--- a/fs/ext4/file.c
|
||
|
+++ b/fs/ext4/file.c
|
||
|
@@ -88,6 +88,100 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+/**
|
||
|
+ * copy of __generic_file_write_iter with explicit O_DIRECT status
|
||
|
+ * @iocb: IO state structure (file, offset, etc.)
|
||
|
+ * @from: iov_iter with data to write
|
||
|
+ * @direct: perform O_DIRECT IO
|
||
|
+ */
|
||
|
+static ssize_t
|
||
|
+__ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from, int direct)
|
||
|
+{
|
||
|
+ struct file *file = iocb->ki_filp;
|
||
|
+ struct address_space *mapping = file->f_mapping;
|
||
|
+ struct inode *inode = mapping->host;
|
||
|
+ loff_t pos = iocb->ki_pos;
|
||
|
+ ssize_t written = 0;
|
||
|
+ ssize_t err;
|
||
|
+ ssize_t status;
|
||
|
+ size_t count = iov_iter_count(from);
|
||
|
+
|
||
|
+ /* We can write back this queue in page reclaim */
|
||
|
+ current->backing_dev_info = mapping->backing_dev_info;
|
||
|
+ err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
|
||
|
+ if (err)
|
||
|
+ goto out;
|
||
|
+
|
||
|
+ if (count == 0)
|
||
|
+ goto out;
|
||
|
+
|
||
|
+ iov_iter_truncate(from, count);
|
||
|
+
|
||
|
+ err = file_remove_suid(file);
|
||
|
+ if (err)
|
||
|
+ goto out;
|
||
|
+
|
||
|
+ err = file_update_time(file);
|
||
|
+ if (err)
|
||
|
+ goto out;
|
||
|
+
|
||
|
+ /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
|
||
|
+ if (unlikely(direct)) {
|
||
|
+ loff_t endbyte;
|
||
|
+
|
||
|
+ written = generic_file_direct_write(iocb, from, pos);
|
||
|
+ if (written < 0 || written == count)
|
||
|
+ goto out;
|
||
|
+
|
||
|
+ /*
|
||
|
+ * direct-io write to a hole: fall through to buffered I/O
|
||
|
+ * for completing the rest of the request.
|
||
|
+ */
|
||
|
+ pos += written;
|
||
|
+ count -= written;
|
||
|
+
|
||
|
+ status = generic_perform_write(file, from, pos);
|
||
|
+ /*
|
||
|
+ * If generic_perform_write() returned a synchronous error
|
||
|
+ * then we want to return the number of bytes which were
|
||
|
+ * direct-written, or the error code if that was zero. Note
|
||
|
+ * that this differs from normal direct-io semantics, which
|
||
|
+ * will return -EFOO even if some bytes were written.
|
||
|
+ */
|
||
|
+ if (unlikely(status < 0)) {
|
||
|
+ err = status;
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+ iocb->ki_pos = pos + status;
|
||
|
+ /*
|
||
|
+ * We need to ensure that the page cache pages are written to
|
||
|
+ * disk and invalidated to preserve the expected O_DIRECT
|
||
|
+ * semantics.
|
||
|
+ */
|
||
|
+ endbyte = pos + status - 1;
|
||
|
+ err = filemap_write_and_wait_range(file->f_mapping, pos,
|
||
|
+ endbyte);
|
||
|
+ if (err == 0) {
|
||
|
+ written += status;
|
||
|
+ invalidate_mapping_pages(mapping,
|
||
|
+ pos >> PAGE_CACHE_SHIFT,
|
||
|
+ endbyte >> PAGE_CACHE_SHIFT);
|
||
|
+ } else {
|
||
|
+ /*
|
||
|
+ * We don't know how much we wrote, so just return
|
||
|
+ * the number of bytes which were direct-written
|
||
|
+ */
|
||
|
+ }
|
||
|
+ } else {
|
||
|
+ written = generic_perform_write(file, from, pos);
|
||
|
+ if (likely(written >= 0))
|
||
|
+ iocb->ki_pos = pos + written;
|
||
|
+ }
|
||
|
+out:
|
||
|
+ current->backing_dev_info = NULL;
|
||
|
+ return written ? written : err;
|
||
|
+}
|
||
|
+
|
||
|
static ssize_t
|
||
|
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||
|
{
|
||
|
@@ -172,7 +266,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
- ret = __generic_file_write_iter(iocb, from);
|
||
|
+ ret = __ext4_file_write_iter(iocb, from, o_direct);
|
||
|
mutex_unlock(&inode->i_mutex);
|
||
|
|
||
|
if (ret > 0) {
|
||
|
--
|
||
|
1.9.3
|
||
|
|