126 lines
3.9 KiB
Diff
126 lines
3.9 KiB
Diff
From 71a6398a4b59ddcf920dfb68872b5a771c606e3a Mon Sep 17 00:00:00 2001
|
|
From: Dmitry Monakhov <dmonakhov@openvz.org>
|
|
Date: Sun, 30 Sep 2012 23:03:42 -0400
|
|
Subject: [PATCH 09/13] ext4: punch_hole should wait for DIO writers
|
|
|
|
punch_hole is the place where we have to wait for all existing writers
|
|
(writeback, aio, dio), but currently we simply flush pended end_io request
|
|
which is not sufficient. Other issue is that punch_hole performed w/o i_mutex
|
|
held which obviously result in dangerous data corruption due to
|
|
write-after-free.
|
|
|
|
This patch performs following changes:
|
|
- Guard punch_hole with i_mutex
|
|
- Recheck inode flags under i_mutex
|
|
- Block all new dio readers in order to prevent information leak caused by
|
|
read-after-free pattern.
|
|
- punch_hole now wait for all writers in flight
|
|
NOTE: XXX write-after-free race is still possible because new dirty pages
|
|
may appear due to mmap(), and currently there is no easy way to stop
|
|
writeback while punch_hole is in progress.
|
|
|
|
[ Fixed error return from ext4_ext_punch_hole() to make sure that we
|
|
release i_mutex before returning EPERM or ETXTBUSY -- Ted ]
|
|
|
|
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
|
|
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
|
|
(cherry picked from commit 02d262dffcf4c74e5c4612ee736bdb94f18ed5b9)
|
|
---
|
|
fs/ext4/extents.c | 53 ++++++++++++++++++++++++++++++++++++-----------------
|
|
1 file changed, 36 insertions(+), 17 deletions(-)
|
|
|
|
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
|
|
index 1fbf2ff..202eb4d 100644
|
|
--- a/fs/ext4/extents.c
|
|
+++ b/fs/ext4/extents.c
|
|
@@ -4776,9 +4776,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
|
|
loff_t first_page_offset, last_page_offset;
|
|
int credits, err = 0;
|
|
|
|
+ /*
|
|
+ * Write out all dirty pages to avoid race conditions
|
|
+ * Then release them.
|
|
+ */
|
|
+ if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
|
+ err = filemap_write_and_wait_range(mapping,
|
|
+ offset, offset + length - 1);
|
|
+
|
|
+ if (err)
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ mutex_lock(&inode->i_mutex);
|
|
+ /* It's not possible punch hole on append only file */
|
|
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
|
|
+ err = -EPERM;
|
|
+ goto out_mutex;
|
|
+ }
|
|
+ if (IS_SWAPFILE(inode)) {
|
|
+ err = -ETXTBSY;
|
|
+ goto out_mutex;
|
|
+ }
|
|
+
|
|
/* No need to punch hole beyond i_size */
|
|
if (offset >= inode->i_size)
|
|
- return 0;
|
|
+ goto out_mutex;
|
|
|
|
/*
|
|
* If the hole extends beyond i_size, set the hole
|
|
@@ -4796,33 +4819,25 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
|
|
first_page_offset = first_page << PAGE_CACHE_SHIFT;
|
|
last_page_offset = last_page << PAGE_CACHE_SHIFT;
|
|
|
|
- /*
|
|
- * Write out all dirty pages to avoid race conditions
|
|
- * Then release them.
|
|
- */
|
|
- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
|
|
- err = filemap_write_and_wait_range(mapping,
|
|
- offset, offset + length - 1);
|
|
-
|
|
- if (err)
|
|
- return err;
|
|
- }
|
|
-
|
|
/* Now release the pages */
|
|
if (last_page_offset > first_page_offset) {
|
|
truncate_pagecache_range(inode, first_page_offset,
|
|
last_page_offset - 1);
|
|
}
|
|
|
|
- /* finish any pending end_io work */
|
|
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
|
|
+ ext4_inode_block_unlocked_dio(inode);
|
|
+ inode_dio_wait(inode);
|
|
err = ext4_flush_completed_IO(inode);
|
|
if (err)
|
|
- return err;
|
|
+ goto out_dio;
|
|
|
|
credits = ext4_writepage_trans_blocks(inode);
|
|
handle = ext4_journal_start(inode, credits);
|
|
- if (IS_ERR(handle))
|
|
- return PTR_ERR(handle);
|
|
+ if (IS_ERR(handle)) {
|
|
+ err = PTR_ERR(handle);
|
|
+ goto out_dio;
|
|
+ }
|
|
|
|
err = ext4_orphan_add(handle, inode);
|
|
if (err)
|
|
@@ -4916,6 +4931,10 @@ out:
|
|
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
|
|
ext4_mark_inode_dirty(handle, inode);
|
|
ext4_journal_stop(handle);
|
|
+out_dio:
|
|
+ ext4_inode_resume_unlocked_dio(inode);
|
|
+out_mutex:
|
|
+ mutex_unlock(&inode->i_mutex);
|
|
return err;
|
|
}
|
|
int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|
--
|
|
1.7.12.rc0.22.gcdd159b
|
|
|